{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Use Evolution Strategy to Play BipedalWalker-v3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "import sys\n",
    "import logging\n",
    "import itertools\n",
    "\n",
    "import numpy as np\n",
    "np.random.seed(0)\n",
    "import gym\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "logging.basicConfig(level=logging.DEBUG,\n",
    "        format='%(asctime)s [%(levelname)s] %(message)s',\n",
    "        stream=sys.stdout, datefmt='%H:%M:%S')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "09:04:46 [INFO] env: <BipedalWalker<BipedalWalker-v3>>\n",
      "09:04:46 [INFO] action_space: Box(-1.0, 1.0, (4,), float32)\n",
      "09:04:46 [INFO] observation_space: Box(-inf, inf, (24,), float32)\n",
      "09:04:46 [INFO] reward_range: (-inf, inf)\n",
      "09:04:46 [INFO] metadata: {'render.modes': ['human', 'rgb_array'], 'video.frames_per_second': 50}\n",
      "09:04:46 [INFO] _max_episode_steps: 1600\n",
      "09:04:46 [INFO] _elapsed_steps: None\n",
      "09:04:46 [INFO] id: BipedalWalker-v3\n",
      "09:04:46 [INFO] entry_point: gym.envs.box2d:BipedalWalker\n",
      "09:04:46 [INFO] reward_threshold: 300\n",
      "09:04:46 [INFO] nondeterministic: False\n",
      "09:04:46 [INFO] max_episode_steps: 1600\n",
      "09:04:46 [INFO] _kwargs: {}\n",
      "09:04:46 [INFO] _env_name: BipedalWalker\n"
     ]
    }
   ],
   "source": [
    "env = gym.make('BipedalWalker-v3')\n",
    "env.seed(0)\n",
    "for key in vars(env):\n",
    "    logging.info('%s: %s', key, vars(env)[key])\n",
    "for key in vars(env.spec):\n",
    "    logging.info('%s: %s', key, vars(env.spec)[key])\n",
    "\n",
    "def clip_reward(reward):\n",
    "    return np.clip(reward, -1., 1.)\n",
    "reward_clipped_env = gym.wrappers.TransformReward(env, clip_reward)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def play_episode(env, agent, max_episode_steps=None, mode=None, render=False):\n",
    "    observation, reward, done = env.reset(), 0., False\n",
    "    agent.reset(mode=mode)\n",
    "    episode_reward, elapsed_steps = 0., 0\n",
    "    while True:\n",
    "        action = agent.step(observation, reward, done)\n",
    "        if render:\n",
    "            env.render()\n",
    "        if done:\n",
    "            break\n",
    "        observation, reward, done, _ = env.step(action)\n",
    "        episode_reward += reward\n",
    "        elapsed_steps += 1\n",
    "        if max_episode_steps and elapsed_steps >= max_episode_steps:\n",
    "            break\n",
    "    agent.close()\n",
    "    return episode_reward, elapsed_steps"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "class ESAgent:\n",
    "    def __init__(self, env=None, weights=None, bias=None):\n",
    "        if weights is not None:\n",
    "            self.weights = weights\n",
    "        else:\n",
    "            self.weights = np.zeros((env.observation_space.shape[0],\n",
    "                    env.action_space.shape[0]))\n",
    "        if bias is not None:\n",
    "            self.bias = bias\n",
    "        else:\n",
    "            self.bias = np.zeros(env.action_space.shape[0])\n",
    "\n",
    "    def reset(self, mode=None):\n",
    "        pass\n",
    "\n",
    "    def close(self):\n",
    "        pass\n",
    "\n",
    "    def step(self, observation, _reward, _done):\n",
    "        action = np.matmul(observation, self.weights)\n",
    "        return action\n",
    "\n",
    "    def train(self, env, scale=0.05, learning_rate=0.2, population=16):\n",
    "        # permulate weights\n",
    "        weight_deltas = [scale * np.random.randn(*agent.weights.shape) for _ in\n",
    "                range(population)]\n",
    "        bias_deltas = [scale * np.random.randn(*agent.bias.shape) for _ in\n",
    "                range(population)]\n",
    "\n",
    "        # calculate rewards\n",
    "        agents = [ESAgent(weights=self.weights + weight_delta,\n",
    "                bias=self.bias + bias_delta) for weight_delta, bias_delta in\n",
    "                zip(weight_deltas, bias_deltas)]\n",
    "        rewards = np.array([play_episode(env, agent)[0] for agent in agents])\n",
    "\n",
    "        # standardize the rewards\n",
    "        std = rewards.std()\n",
    "        if np.isclose(std, 0):\n",
    "            coeffs = np.zeros(population)\n",
    "        else:\n",
    "            coeffs = (rewards - rewards.mean()) / std\n",
    "\n",
    "        # update\n",
    "        weight_updates = sum([coeff * weight_delta for coeff, weight_delta in\n",
    "                zip(coeffs, weight_deltas)])\n",
    "        bias_updates = sum([coeff * bias_delta for coeff, bias_delta in\n",
    "                zip(coeffs, bias_deltas)])\n",
    "        self.weights += learning_rate * weight_updates / population\n",
    "        self.bias += learning_rate * bias_updates / population\n",
    "\n",
    "\n",
    "agent = ESAgent(env=env)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "09:04:46 [INFO] ==== train & evaluate ====\n",
      "09:04:48 [DEBUG] evaluate generation 0: reward = -92.19, steps = 109\n",
      "09:04:51 [DEBUG] evaluate generation 1: reward = -92.18, steps = 111\n",
      "09:04:57 [DEBUG] evaluate generation 2: reward = -92.18, steps = 110\n",
      "09:05:02 [DEBUG] evaluate generation 3: reward = -92.05, steps = 108\n",
      "09:05:05 [DEBUG] evaluate generation 4: reward = -92.13, steps = 104\n",
      "09:05:12 [DEBUG] evaluate generation 5: reward = -92.13, steps = 104\n",
      "09:05:17 [DEBUG] evaluate generation 6: reward = -92.21, steps = 103\n",
      "09:05:21 [DEBUG] evaluate generation 7: reward = -92.01, steps = 102\n",
      "09:05:26 [DEBUG] evaluate generation 8: reward = -92.37, steps = 99\n",
      "09:05:30 [DEBUG] evaluate generation 9: reward = -92.47, steps = 97\n",
      "09:05:34 [DEBUG] evaluate generation 10: reward = -92.55, steps = 98\n",
      "09:05:37 [DEBUG] evaluate generation 11: reward = -92.52, steps = 100\n",
      "09:05:41 [DEBUG] evaluate generation 12: reward = -92.35, steps = 101\n",
      "09:05:44 [DEBUG] evaluate generation 13: reward = -92.59, steps = 97\n",
      "09:05:48 [DEBUG] evaluate generation 14: reward = -91.76, steps = 122\n",
      "09:05:51 [DEBUG] evaluate generation 15: reward = -91.93, steps = 118\n",
      "09:05:54 [DEBUG] evaluate generation 16: reward = -117.25, steps = 105\n",
      "09:05:59 [DEBUG] evaluate generation 17: reward = -117.72, steps = 120\n",
      "09:06:02 [DEBUG] evaluate generation 18: reward = -117.29, steps = 99\n",
      "09:06:06 [DEBUG] evaluate generation 19: reward = -117.01, steps = 92\n",
      "09:06:08 [DEBUG] evaluate generation 20: reward = -117.55, steps = 91\n",
      "09:06:14 [DEBUG] evaluate generation 21: reward = -117.43, steps = 99\n",
      "09:06:16 [DEBUG] evaluate generation 22: reward = -117.45, steps = 94\n",
      "09:06:20 [DEBUG] evaluate generation 23: reward = -118.17, steps = 123\n",
      "09:06:23 [DEBUG] evaluate generation 24: reward = -91.92, steps = 164\n",
      "09:06:25 [DEBUG] evaluate generation 25: reward = -92.83, steps = 184\n",
      "09:06:28 [DEBUG] evaluate generation 26: reward = -92.14, steps = 114\n",
      "09:06:30 [DEBUG] evaluate generation 27: reward = -92.15, steps = 123\n",
      "09:06:31 [DEBUG] evaluate generation 28: reward = -92.66, steps = 101\n",
      "09:06:36 [DEBUG] evaluate generation 29: reward = -92.58, steps = 98\n",
      "09:06:40 [DEBUG] evaluate generation 30: reward = -92.75, steps = 95\n",
      "09:06:42 [DEBUG] evaluate generation 31: reward = -92.85, steps = 99\n",
      "09:06:48 [DEBUG] evaluate generation 32: reward = -92.81, steps = 99\n",
      "09:06:50 [DEBUG] evaluate generation 33: reward = -92.85, steps = 98\n",
      "09:06:52 [DEBUG] evaluate generation 34: reward = -92.82, steps = 97\n",
      "09:06:54 [DEBUG] evaluate generation 35: reward = -92.77, steps = 97\n",
      "09:06:57 [DEBUG] evaluate generation 36: reward = -92.93, steps = 96\n",
      "09:06:58 [DEBUG] evaluate generation 37: reward = -93.12, steps = 94\n",
      "09:07:00 [DEBUG] evaluate generation 38: reward = -93.40, steps = 91\n",
      "09:07:02 [DEBUG] evaluate generation 39: reward = -93.22, steps = 92\n",
      "09:07:03 [DEBUG] evaluate generation 40: reward = -92.89, steps = 94\n",
      "09:07:05 [DEBUG] evaluate generation 41: reward = -92.91, steps = 92\n",
      "09:07:08 [DEBUG] evaluate generation 42: reward = -93.00, steps = 92\n",
      "09:07:10 [DEBUG] evaluate generation 43: reward = -92.96, steps = 97\n",
      "09:07:13 [DEBUG] evaluate generation 44: reward = -93.11, steps = 91\n",
      "09:07:14 [DEBUG] evaluate generation 45: reward = -93.40, steps = 91\n",
      "09:07:17 [DEBUG] evaluate generation 46: reward = -93.44, steps = 90\n",
      "09:07:20 [DEBUG] evaluate generation 47: reward = -93.40, steps = 90\n",
      "09:07:22 [DEBUG] evaluate generation 48: reward = -93.94, steps = 81\n",
      "09:07:24 [DEBUG] evaluate generation 49: reward = -93.60, steps = 87\n",
      "09:07:28 [DEBUG] evaluate generation 50: reward = -93.90, steps = 84\n",
      "09:07:29 [DEBUG] evaluate generation 51: reward = -93.31, steps = 92\n",
      "09:07:31 [DEBUG] evaluate generation 52: reward = -93.20, steps = 94\n",
      "09:07:32 [DEBUG] evaluate generation 53: reward = -93.20, steps = 98\n",
      "09:07:33 [DEBUG] evaluate generation 54: reward = -93.01, steps = 100\n",
      "09:07:35 [DEBUG] evaluate generation 55: reward = -93.30, steps = 96\n",
      "09:07:37 [DEBUG] evaluate generation 56: reward = -93.80, steps = 92\n",
      "09:07:38 [DEBUG] evaluate generation 57: reward = -94.09, steps = 93\n",
      "09:07:39 [DEBUG] evaluate generation 58: reward = -94.15, steps = 93\n",
      "09:07:41 [DEBUG] evaluate generation 59: reward = -93.38, steps = 99\n",
      "09:07:44 [DEBUG] evaluate generation 60: reward = -93.68, steps = 108\n",
      "09:07:45 [DEBUG] evaluate generation 61: reward = -93.45, steps = 110\n",
      "09:07:46 [DEBUG] evaluate generation 62: reward = -93.53, steps = 100\n",
      "09:07:47 [DEBUG] evaluate generation 63: reward = -93.35, steps = 113\n",
      "09:07:49 [DEBUG] evaluate generation 64: reward = -93.47, steps = 146\n",
      "09:07:50 [DEBUG] evaluate generation 65: reward = -93.72, steps = 101\n",
      "09:07:50 [DEBUG] evaluate generation 66: reward = -93.49, steps = 114\n",
      "09:07:51 [DEBUG] evaluate generation 67: reward = -93.46, steps = 109\n",
      "09:07:52 [DEBUG] evaluate generation 68: reward = -94.44, steps = 103\n",
      "09:07:54 [DEBUG] evaluate generation 69: reward = -93.48, steps = 117\n",
      "09:07:57 [DEBUG] evaluate generation 70: reward = -93.42, steps = 107\n",
      "09:08:01 [DEBUG] evaluate generation 71: reward = -93.52, steps = 107\n",
      "09:08:02 [DEBUG] evaluate generation 72: reward = -93.59, steps = 106\n",
      "09:08:03 [DEBUG] evaluate generation 73: reward = -93.52, steps = 102\n",
      "09:08:06 [DEBUG] evaluate generation 74: reward = -93.86, steps = 104\n",
      "09:08:07 [DEBUG] evaluate generation 75: reward = -93.55, steps = 110\n",
      "09:08:08 [DEBUG] evaluate generation 76: reward = -93.51, steps = 105\n",
      "09:08:09 [DEBUG] evaluate generation 77: reward = -93.55, steps = 110\n",
      "09:08:10 [DEBUG] evaluate generation 78: reward = -93.40, steps = 103\n",
      "09:08:11 [DEBUG] evaluate generation 79: reward = -93.30, steps = 107\n",
      "09:08:12 [DEBUG] evaluate generation 80: reward = -93.27, steps = 103\n",
      "09:08:16 [DEBUG] evaluate generation 81: reward = -93.81, steps = 106\n",
      "09:08:17 [DEBUG] evaluate generation 82: reward = -93.22, steps = 101\n",
      "09:08:19 [DEBUG] evaluate generation 83: reward = -93.60, steps = 111\n",
      "09:08:20 [DEBUG] evaluate generation 84: reward = -93.30, steps = 106\n",
      "09:08:21 [DEBUG] evaluate generation 85: reward = -93.27, steps = 112\n",
      "09:08:23 [DEBUG] evaluate generation 86: reward = -93.86, steps = 104\n",
      "09:08:24 [DEBUG] evaluate generation 87: reward = -93.71, steps = 106\n",
      "09:08:26 [DEBUG] evaluate generation 88: reward = -93.34, steps = 106\n",
      "09:08:27 [DEBUG] evaluate generation 89: reward = -92.74, steps = 108\n",
      "09:08:29 [DEBUG] evaluate generation 90: reward = -93.02, steps = 105\n",
      "09:08:30 [DEBUG] evaluate generation 91: reward = -93.10, steps = 99\n",
      "09:08:31 [DEBUG] evaluate generation 92: reward = -92.96, steps = 100\n",
      "09:08:34 [DEBUG] evaluate generation 93: reward = -92.92, steps = 110\n",
      "09:08:36 [DEBUG] evaluate generation 94: reward = -92.95, steps = 103\n",
      "09:08:38 [DEBUG] evaluate generation 95: reward = -92.93, steps = 108\n",
      "09:08:39 [DEBUG] evaluate generation 96: reward = -93.38, steps = 108\n",
      "09:08:40 [DEBUG] evaluate generation 97: reward = -93.23, steps = 114\n",
      "09:08:42 [DEBUG] evaluate generation 98: reward = -93.06, steps = 106\n",
      "09:08:43 [DEBUG] evaluate generation 99: reward = -93.39, steps = 113\n",
      "09:08:44 [DEBUG] evaluate generation 100: reward = -93.64, steps = 102\n",
      "09:08:45 [DEBUG] evaluate generation 101: reward = -93.45, steps = 113\n",
      "09:08:46 [DEBUG] evaluate generation 102: reward = -93.22, steps = 112\n",
      "09:08:47 [DEBUG] evaluate generation 103: reward = -93.26, steps = 111\n",
      "09:08:48 [DEBUG] evaluate generation 104: reward = -93.69, steps = 120\n",
      "09:08:49 [DEBUG] evaluate generation 105: reward = -93.72, steps = 108\n",
      "09:08:50 [DEBUG] evaluate generation 106: reward = -93.47, steps = 108\n",
      "09:08:52 [DEBUG] evaluate generation 107: reward = -93.52, steps = 107\n",
      "09:08:54 [DEBUG] evaluate generation 108: reward = -93.63, steps = 109\n",
      "09:08:55 [DEBUG] evaluate generation 109: reward = -93.55, steps = 96\n",
      "09:08:57 [DEBUG] evaluate generation 110: reward = -93.50, steps = 101\n",
      "09:08:58 [DEBUG] evaluate generation 111: reward = -94.32, steps = 88\n",
      "09:08:59 [DEBUG] evaluate generation 112: reward = -93.63, steps = 111\n",
      "09:09:00 [DEBUG] evaluate generation 113: reward = -93.57, steps = 115\n",
      "09:09:01 [DEBUG] evaluate generation 114: reward = -93.98, steps = 87\n",
      "09:09:03 [DEBUG] evaluate generation 115: reward = -93.91, steps = 86\n",
      "09:09:04 [DEBUG] evaluate generation 116: reward = -94.78, steps = 80\n",
      "09:09:06 [DEBUG] evaluate generation 117: reward = -95.19, steps = 79\n",
      "09:09:08 [DEBUG] evaluate generation 118: reward = -94.34, steps = 80\n",
      "09:09:09 [DEBUG] evaluate generation 119: reward = -94.30, steps = 82\n",
      "09:09:10 [DEBUG] evaluate generation 120: reward = -95.36, steps = 80\n",
      "09:09:11 [DEBUG] evaluate generation 121: reward = -94.90, steps = 87\n",
      "09:09:12 [DEBUG] evaluate generation 122: reward = -93.30, steps = 110\n",
      "09:09:14 [DEBUG] evaluate generation 123: reward = -93.71, steps = 94\n",
      "09:09:15 [DEBUG] evaluate generation 124: reward = -93.52, steps = 93\n",
      "09:09:17 [DEBUG] evaluate generation 125: reward = -93.69, steps = 92\n",
      "09:09:18 [DEBUG] evaluate generation 126: reward = -93.38, steps = 102\n",
      "09:09:20 [DEBUG] evaluate generation 127: reward = -93.50, steps = 103\n",
      "09:09:22 [DEBUG] evaluate generation 128: reward = -93.45, steps = 98\n",
      "09:09:23 [DEBUG] evaluate generation 129: reward = -93.67, steps = 94\n",
      "09:09:24 [DEBUG] evaluate generation 130: reward = -93.39, steps = 97\n",
      "09:09:25 [DEBUG] evaluate generation 131: reward = -93.71, steps = 96\n",
      "09:09:26 [DEBUG] evaluate generation 132: reward = -93.62, steps = 97\n",
      "09:09:27 [DEBUG] evaluate generation 133: reward = -93.75, steps = 92\n",
      "09:09:29 [DEBUG] evaluate generation 134: reward = -93.65, steps = 101\n",
      "09:09:30 [DEBUG] evaluate generation 135: reward = -93.76, steps = 91\n",
      "09:09:31 [DEBUG] evaluate generation 136: reward = -93.66, steps = 95\n",
      "09:09:33 [DEBUG] evaluate generation 137: reward = -93.81, steps = 89\n",
      "09:09:34 [DEBUG] evaluate generation 138: reward = -94.97, steps = 81\n",
      "09:09:35 [DEBUG] evaluate generation 139: reward = -95.02, steps = 85\n",
      "09:09:37 [DEBUG] evaluate generation 140: reward = -93.58, steps = 95\n",
      "09:09:38 [DEBUG] evaluate generation 141: reward = -94.70, steps = 83\n",
      "09:09:39 [DEBUG] evaluate generation 142: reward = -94.31, steps = 86\n",
      "09:09:40 [DEBUG] evaluate generation 143: reward = -95.02, steps = 86\n",
      "09:09:41 [DEBUG] evaluate generation 144: reward = -93.47, steps = 91\n",
      "09:09:42 [DEBUG] evaluate generation 145: reward = -93.23, steps = 102\n",
      "09:09:43 [DEBUG] evaluate generation 146: reward = -93.10, steps = 103\n",
      "09:09:44 [DEBUG] evaluate generation 147: reward = -93.12, steps = 99\n",
      "09:09:45 [DEBUG] evaluate generation 148: reward = -93.56, steps = 103\n",
      "09:09:47 [DEBUG] evaluate generation 149: reward = -93.15, steps = 102\n",
      "09:09:48 [DEBUG] evaluate generation 150: reward = -93.14, steps = 99\n",
      "09:09:50 [DEBUG] evaluate generation 151: reward = -93.07, steps = 106\n",
      "09:09:52 [DEBUG] evaluate generation 152: reward = -93.18, steps = 100\n",
      "09:09:53 [DEBUG] evaluate generation 153: reward = -93.24, steps = 100\n",
      "09:09:54 [DEBUG] evaluate generation 154: reward = -93.06, steps = 102\n",
      "09:09:55 [DEBUG] evaluate generation 155: reward = -93.26, steps = 104\n",
      "09:09:57 [DEBUG] evaluate generation 156: reward = -93.14, steps = 105\n",
      "09:09:58 [DEBUG] evaluate generation 157: reward = -93.00, steps = 113\n",
      "09:09:59 [DEBUG] evaluate generation 158: reward = -93.04, steps = 99\n",
      "09:10:00 [DEBUG] evaluate generation 159: reward = -94.67, steps = 84\n",
      "09:10:02 [DEBUG] evaluate generation 160: reward = -94.40, steps = 86\n",
      "09:10:03 [DEBUG] evaluate generation 161: reward = -94.13, steps = 85\n",
      "09:10:06 [DEBUG] evaluate generation 162: reward = -93.84, steps = 88\n",
      "09:10:08 [DEBUG] evaluate generation 163: reward = -94.31, steps = 86\n",
      "09:10:09 [DEBUG] evaluate generation 164: reward = -94.38, steps = 83\n",
      "09:10:10 [DEBUG] evaluate generation 165: reward = -94.07, steps = 87\n",
      "09:10:12 [DEBUG] evaluate generation 166: reward = -93.92, steps = 87\n",
      "09:10:13 [DEBUG] evaluate generation 167: reward = -93.11, steps = 85\n",
      "09:10:15 [DEBUG] evaluate generation 168: reward = -93.28, steps = 87\n",
      "09:10:16 [DEBUG] evaluate generation 169: reward = -93.45, steps = 79\n",
      "09:10:17 [DEBUG] evaluate generation 170: reward = -93.34, steps = 83\n",
      "09:10:19 [DEBUG] evaluate generation 171: reward = -93.42, steps = 81\n",
      "09:10:20 [DEBUG] evaluate generation 172: reward = -93.34, steps = 84\n",
      "09:10:22 [DEBUG] evaluate generation 173: reward = -93.55, steps = 76\n",
      "09:10:23 [DEBUG] evaluate generation 174: reward = -93.50, steps = 82\n",
      "09:10:24 [DEBUG] evaluate generation 175: reward = -93.31, steps = 87\n",
      "09:10:25 [DEBUG] evaluate generation 176: reward = -93.53, steps = 83\n",
      "09:10:28 [DEBUG] evaluate generation 177: reward = -94.01, steps = 81\n",
      "09:10:29 [DEBUG] evaluate generation 178: reward = -96.10, steps = 78\n",
      "09:10:31 [DEBUG] evaluate generation 179: reward = -94.68, steps = 75\n",
      "09:10:32 [DEBUG] evaluate generation 180: reward = -93.41, steps = 82\n",
      "09:10:35 [DEBUG] evaluate generation 181: reward = -94.72, steps = 79\n",
      "09:10:36 [DEBUG] evaluate generation 182: reward = -94.06, steps = 76\n",
      "09:10:37 [DEBUG] evaluate generation 183: reward = -95.32, steps = 76\n",
      "09:10:38 [DEBUG] evaluate generation 184: reward = -94.50, steps = 79\n",
      "09:10:40 [DEBUG] evaluate generation 185: reward = -93.89, steps = 81\n",
      "09:10:41 [DEBUG] evaluate generation 186: reward = -94.23, steps = 81\n",
      "09:10:42 [DEBUG] evaluate generation 187: reward = -96.12, steps = 73\n",
      "09:10:45 [DEBUG] evaluate generation 188: reward = -93.51, steps = 76\n",
      "09:10:46 [DEBUG] evaluate generation 189: reward = -93.59, steps = 79\n",
      "09:10:48 [DEBUG] evaluate generation 190: reward = -95.33, steps = 74\n",
      "09:10:49 [DEBUG] evaluate generation 191: reward = -94.28, steps = 80\n",
      "09:10:52 [DEBUG] evaluate generation 192: reward = -93.71, steps = 79\n",
      "09:10:53 [DEBUG] evaluate generation 193: reward = -93.37, steps = 84\n",
      "09:10:54 [DEBUG] evaluate generation 194: reward = -93.20, steps = 87\n",
      "09:10:57 [DEBUG] evaluate generation 195: reward = -93.46, steps = 93\n",
      "09:10:59 [DEBUG] evaluate generation 196: reward = -93.36, steps = 88\n",
      "09:11:00 [DEBUG] evaluate generation 197: reward = -93.36, steps = 82\n",
      "09:11:04 [DEBUG] evaluate generation 198: reward = -93.36, steps = 87\n",
      "09:11:07 [DEBUG] evaluate generation 199: reward = -94.35, steps = 75\n",
      "09:11:09 [DEBUG] evaluate generation 200: reward = -94.25, steps = 81\n",
      "09:11:11 [DEBUG] evaluate generation 201: reward = -94.16, steps = 84\n",
      "09:11:13 [DEBUG] evaluate generation 202: reward = -94.03, steps = 90\n",
      "09:11:15 [DEBUG] evaluate generation 203: reward = -94.06, steps = 87\n",
      "09:11:15 [DEBUG] evaluate generation 204: reward = -94.05, steps = 83\n",
      "09:11:16 [DEBUG] evaluate generation 205: reward = -93.34, steps = 83\n",
      "09:11:18 [DEBUG] evaluate generation 206: reward = -93.28, steps = 82\n",
      "09:11:20 [DEBUG] evaluate generation 207: reward = -94.33, steps = 82\n",
      "09:11:21 [DEBUG] evaluate generation 208: reward = -93.14, steps = 82\n",
      "09:11:22 [DEBUG] evaluate generation 209: reward = -93.26, steps = 83\n",
      "09:11:23 [DEBUG] evaluate generation 210: reward = -93.25, steps = 83\n",
      "09:11:24 [DEBUG] evaluate generation 211: reward = -93.13, steps = 86\n",
      "09:11:25 [DEBUG] evaluate generation 212: reward = -93.13, steps = 81\n",
      "09:11:26 [DEBUG] evaluate generation 213: reward = -93.10, steps = 85\n",
      "09:11:27 [DEBUG] evaluate generation 214: reward = -93.09, steps = 86\n",
      "09:11:29 [DEBUG] evaluate generation 215: reward = -93.16, steps = 83\n",
      "09:11:33 [DEBUG] evaluate generation 216: reward = -93.19, steps = 84\n",
      "09:11:35 [DEBUG] evaluate generation 217: reward = -93.26, steps = 83\n",
      "09:11:38 [DEBUG] evaluate generation 218: reward = -93.31, steps = 84\n",
      "09:11:39 [DEBUG] evaluate generation 219: reward = -93.28, steps = 86\n",
      "09:11:41 [DEBUG] evaluate generation 220: reward = -93.29, steps = 86\n",
      "09:11:42 [DEBUG] evaluate generation 221: reward = -93.26, steps = 85\n",
      "09:11:43 [DEBUG] evaluate generation 222: reward = -93.18, steps = 84\n",
      "09:11:45 [DEBUG] evaluate generation 223: reward = -93.17, steps = 86\n",
      "09:11:47 [DEBUG] evaluate generation 224: reward = -93.25, steps = 86\n",
      "09:11:49 [DEBUG] evaluate generation 225: reward = -93.30, steps = 87\n",
      "09:11:51 [DEBUG] evaluate generation 226: reward = -93.17, steps = 83\n",
      "09:11:53 [DEBUG] evaluate generation 227: reward = -93.30, steps = 82\n",
      "09:11:54 [DEBUG] evaluate generation 228: reward = -93.32, steps = 80\n",
      "09:11:55 [DEBUG] evaluate generation 229: reward = -93.29, steps = 80\n",
      "09:11:57 [DEBUG] evaluate generation 230: reward = -93.40, steps = 80\n",
      "09:11:59 [DEBUG] evaluate generation 231: reward = -93.35, steps = 80\n",
      "09:12:00 [DEBUG] evaluate generation 232: reward = -93.59, steps = 76\n",
      "09:12:02 [DEBUG] evaluate generation 233: reward = -94.33, steps = 76\n",
      "09:12:04 [DEBUG] evaluate generation 234: reward = -94.33, steps = 69\n",
      "09:12:06 [DEBUG] evaluate generation 235: reward = -94.56, steps = 67\n",
      "09:12:07 [DEBUG] evaluate generation 236: reward = -94.49, steps = 68\n",
      "09:12:09 [DEBUG] evaluate generation 237: reward = -94.84, steps = 66\n",
      "09:12:11 [DEBUG] evaluate generation 238: reward = -94.74, steps = 67\n",
      "09:12:13 [DEBUG] evaluate generation 239: reward = -94.58, steps = 67\n",
      "09:12:14 [DEBUG] evaluate generation 240: reward = -94.53, steps = 64\n",
      "09:12:15 [DEBUG] evaluate generation 241: reward = -94.45, steps = 64\n",
      "09:12:16 [DEBUG] evaluate generation 242: reward = -94.79, steps = 66\n",
      "09:12:18 [DEBUG] evaluate generation 243: reward = -94.48, steps = 65\n",
      "09:12:20 [DEBUG] evaluate generation 244: reward = -94.87, steps = 62\n",
      "09:12:21 [DEBUG] evaluate generation 245: reward = -94.68, steps = 67\n",
      "09:12:22 [DEBUG] evaluate generation 246: reward = -94.39, steps = 69\n",
      "09:12:24 [DEBUG] evaluate generation 247: reward = -94.81, steps = 66\n",
      "09:12:25 [DEBUG] evaluate generation 248: reward = -94.66, steps = 71\n",
      "09:12:26 [DEBUG] evaluate generation 249: reward = -94.63, steps = 70\n",
      "09:12:27 [DEBUG] evaluate generation 250: reward = -94.36, steps = 75\n",
      "09:12:29 [DEBUG] evaluate generation 251: reward = -94.56, steps = 73\n",
      "09:12:30 [DEBUG] evaluate generation 252: reward = -94.68, steps = 72\n",
      "09:12:32 [DEBUG] evaluate generation 253: reward = -94.51, steps = 71\n",
      "09:12:33 [DEBUG] evaluate generation 254: reward = -94.52, steps = 76\n",
      "09:12:34 [DEBUG] evaluate generation 255: reward = -94.28, steps = 82\n",
      "09:12:36 [DEBUG] evaluate generation 256: reward = -94.60, steps = 76\n",
      "09:12:37 [DEBUG] evaluate generation 257: reward = -94.30, steps = 85\n",
      "09:12:39 [DEBUG] evaluate generation 258: reward = -94.34, steps = 82\n",
      "09:12:40 [DEBUG] evaluate generation 259: reward = -94.05, steps = 81\n",
      "09:12:41 [DEBUG] evaluate generation 260: reward = -94.07, steps = 90\n",
      "09:12:43 [DEBUG] evaluate generation 261: reward = -94.04, steps = 82\n",
      "09:12:44 [DEBUG] evaluate generation 262: reward = -93.91, steps = 84\n",
      "09:12:45 [DEBUG] evaluate generation 263: reward = -93.77, steps = 89\n",
      "09:12:47 [DEBUG] evaluate generation 264: reward = -93.95, steps = 87\n",
      "09:12:48 [DEBUG] evaluate generation 265: reward = -94.09, steps = 85\n",
      "09:12:50 [DEBUG] evaluate generation 266: reward = -94.12, steps = 82\n",
      "09:12:51 [DEBUG] evaluate generation 267: reward = -93.60, steps = 75\n",
      "09:12:54 [DEBUG] evaluate generation 268: reward = -93.70, steps = 78\n",
      "09:12:56 [DEBUG] evaluate generation 269: reward = -94.27, steps = 79\n",
      "09:12:57 [DEBUG] evaluate generation 270: reward = -93.93, steps = 87\n",
      "09:12:58 [DEBUG] evaluate generation 271: reward = -93.84, steps = 91\n",
      "09:13:00 [DEBUG] evaluate generation 272: reward = -94.55, steps = 97\n",
      "09:13:01 [DEBUG] evaluate generation 273: reward = -93.79, steps = 87\n",
      "09:13:02 [DEBUG] evaluate generation 274: reward = -94.01, steps = 87\n",
      "09:13:04 [DEBUG] evaluate generation 275: reward = -94.07, steps = 83\n",
      "09:13:06 [DEBUG] evaluate generation 276: reward = -93.90, steps = 81\n",
      "09:13:08 [DEBUG] evaluate generation 277: reward = -94.24, steps = 76\n",
      "09:13:10 [DEBUG] evaluate generation 278: reward = -93.93, steps = 76\n",
      "09:13:11 [DEBUG] evaluate generation 279: reward = -94.23, steps = 70\n",
      "09:13:12 [DEBUG] evaluate generation 280: reward = -94.66, steps = 69\n",
      "09:13:14 [DEBUG] evaluate generation 281: reward = -94.63, steps = 67\n",
      "09:13:16 [DEBUG] evaluate generation 282: reward = -94.63, steps = 65\n",
      "09:13:17 [DEBUG] evaluate generation 283: reward = -94.63, steps = 67\n",
      "09:13:21 [DEBUG] evaluate generation 284: reward = -94.73, steps = 65\n",
      "09:13:22 [DEBUG] evaluate generation 285: reward = -94.48, steps = 73\n",
      "09:13:24 [DEBUG] evaluate generation 286: reward = -94.45, steps = 72\n",
      "09:13:25 [DEBUG] evaluate generation 287: reward = -94.59, steps = 69\n",
      "09:13:26 [DEBUG] evaluate generation 288: reward = -94.73, steps = 88\n",
      "09:13:28 [DEBUG] evaluate generation 289: reward = -94.05, steps = 73\n",
      "09:13:31 [DEBUG] evaluate generation 290: reward = -94.36, steps = 74\n",
      "09:13:35 [DEBUG] evaluate generation 291: reward = -94.20, steps = 75\n",
      "09:13:39 [DEBUG] evaluate generation 292: reward = -94.44, steps = 74\n",
      "09:13:40 [DEBUG] evaluate generation 293: reward = -94.15, steps = 72\n",
      "09:13:41 [DEBUG] evaluate generation 294: reward = -94.27, steps = 74\n",
      "09:13:44 [DEBUG] evaluate generation 295: reward = -94.02, steps = 73\n",
      "09:13:46 [DEBUG] evaluate generation 296: reward = -94.09, steps = 74\n",
      "09:13:48 [DEBUG] evaluate generation 297: reward = -94.39, steps = 82\n",
      "09:13:49 [DEBUG] evaluate generation 298: reward = -94.40, steps = 75\n",
      "09:13:51 [DEBUG] evaluate generation 299: reward = -94.82, steps = 80\n",
      "09:13:52 [DEBUG] evaluate generation 300: reward = -94.89, steps = 87\n",
      "09:13:54 [DEBUG] evaluate generation 301: reward = -94.12, steps = 76\n",
      "09:13:57 [DEBUG] evaluate generation 302: reward = -93.96, steps = 74\n",
      "09:13:59 [DEBUG] evaluate generation 303: reward = -94.09, steps = 77\n",
      "09:14:01 [DEBUG] evaluate generation 304: reward = -94.90, steps = 81\n",
      "09:14:02 [DEBUG] evaluate generation 305: reward = -94.49, steps = 72\n",
      "09:14:04 [DEBUG] evaluate generation 306: reward = -94.92, steps = 80\n",
      "09:14:05 [DEBUG] evaluate generation 307: reward = -94.87, steps = 79\n",
      "09:14:06 [DEBUG] evaluate generation 308: reward = -94.31, steps = 69\n",
      "09:14:07 [DEBUG] evaluate generation 309: reward = -94.97, steps = 81\n",
      "09:14:09 [DEBUG] evaluate generation 310: reward = -94.47, steps = 72\n",
      "09:14:11 [DEBUG] evaluate generation 311: reward = -94.59, steps = 69\n",
      "09:14:12 [DEBUG] evaluate generation 312: reward = -94.59, steps = 70\n",
      "09:14:13 [DEBUG] evaluate generation 313: reward = -95.01, steps = 72\n",
      "09:14:15 [DEBUG] evaluate generation 314: reward = -95.05, steps = 77\n",
      "09:14:16 [DEBUG] evaluate generation 315: reward = -93.92, steps = 75\n",
      "09:14:18 [DEBUG] evaluate generation 316: reward = -93.84, steps = 76\n",
      "09:14:19 [DEBUG] evaluate generation 317: reward = -93.55, steps = 75\n",
      "09:14:20 [DEBUG] evaluate generation 318: reward = -93.72, steps = 77\n",
      "09:14:23 [DEBUG] evaluate generation 319: reward = -94.34, steps = 70\n",
      "09:14:25 [DEBUG] evaluate generation 320: reward = -94.10, steps = 69\n",
      "09:14:26 [DEBUG] evaluate generation 321: reward = -94.64, steps = 70\n",
      "09:14:28 [DEBUG] evaluate generation 322: reward = -94.51, steps = 69\n",
      "09:14:29 [DEBUG] evaluate generation 323: reward = -95.15, steps = 73\n",
      "09:14:32 [DEBUG] evaluate generation 324: reward = -94.91, steps = 64\n",
      "09:14:33 [DEBUG] evaluate generation 325: reward = -94.88, steps = 67\n",
      "09:14:34 [DEBUG] evaluate generation 326: reward = -94.24, steps = 69\n",
      "09:14:35 [DEBUG] evaluate generation 327: reward = -94.43, steps = 68\n",
      "09:14:38 [DEBUG] evaluate generation 328: reward = -94.69, steps = 66\n",
      "09:14:41 [DEBUG] evaluate generation 329: reward = -94.50, steps = 69\n",
      "09:14:42 [DEBUG] evaluate generation 330: reward = -94.38, steps = 71\n",
      "09:14:43 [DEBUG] evaluate generation 331: reward = -94.44, steps = 73\n",
      "09:14:45 [DEBUG] evaluate generation 332: reward = -94.30, steps = 70\n",
      "09:14:48 [DEBUG] evaluate generation 333: reward = -94.40, steps = 70\n",
      "09:14:49 [DEBUG] evaluate generation 334: reward = -94.43, steps = 72\n",
      "09:14:50 [DEBUG] evaluate generation 335: reward = -94.28, steps = 69\n",
      "09:14:52 [DEBUG] evaluate generation 336: reward = -94.56, steps = 73\n",
      "09:14:53 [DEBUG] evaluate generation 337: reward = -94.59, steps = 91\n",
      "09:14:55 [DEBUG] evaluate generation 338: reward = -94.32, steps = 69\n",
      "09:14:57 [DEBUG] evaluate generation 339: reward = -94.29, steps = 75\n",
      "09:14:58 [DEBUG] evaluate generation 340: reward = -94.55, steps = 88\n",
      "09:15:00 [DEBUG] evaluate generation 341: reward = -94.14, steps = 79\n",
      "09:15:01 [DEBUG] evaluate generation 342: reward = -94.04, steps = 84\n",
      "09:15:02 [DEBUG] evaluate generation 343: reward = -93.53, steps = 75\n",
      "09:15:04 [DEBUG] evaluate generation 344: reward = -93.71, steps = 76\n",
      "09:15:06 [DEBUG] evaluate generation 345: reward = -93.88, steps = 80\n",
      "09:15:07 [DEBUG] evaluate generation 346: reward = -93.81, steps = 79\n",
      "09:15:08 [DEBUG] evaluate generation 347: reward = -93.40, steps = 75\n",
      "09:15:11 [DEBUG] evaluate generation 348: reward = -93.70, steps = 88\n",
      "09:15:13 [DEBUG] evaluate generation 349: reward = -93.63, steps = 84\n",
      "09:15:15 [DEBUG] evaluate generation 350: reward = -93.77, steps = 86\n",
      "09:15:16 [DEBUG] evaluate generation 351: reward = -93.78, steps = 81\n",
      "09:15:18 [DEBUG] evaluate generation 352: reward = -92.77, steps = 108\n",
      "09:15:21 [DEBUG] evaluate generation 353: reward = -93.76, steps = 77\n",
      "09:15:22 [DEBUG] evaluate generation 354: reward = -92.61, steps = 107\n",
      "09:15:24 [DEBUG] evaluate generation 355: reward = -93.71, steps = 80\n",
      "09:15:25 [DEBUG] evaluate generation 356: reward = -94.09, steps = 73\n",
      "09:15:26 [DEBUG] evaluate generation 357: reward = -93.82, steps = 83\n",
      "09:15:28 [DEBUG] evaluate generation 358: reward = -93.81, steps = 79\n",
      "09:15:29 [DEBUG] evaluate generation 359: reward = -93.66, steps = 88\n",
      "09:15:30 [DEBUG] evaluate generation 360: reward = -93.75, steps = 90\n",
      "09:15:31 [DEBUG] evaluate generation 361: reward = -117.08, steps = 95\n",
      "09:15:32 [DEBUG] evaluate generation 362: reward = -93.22, steps = 82\n",
      "09:15:35 [DEBUG] evaluate generation 363: reward = -93.43, steps = 89\n",
      "09:15:36 [DEBUG] evaluate generation 364: reward = -93.58, steps = 92\n",
      "09:15:37 [DEBUG] evaluate generation 365: reward = -93.54, steps = 88\n",
      "09:15:38 [DEBUG] evaluate generation 366: reward = -93.37, steps = 98\n",
      "09:15:39 [DEBUG] evaluate generation 367: reward = -93.53, steps = 79\n",
      "09:15:40 [DEBUG] evaluate generation 368: reward = -93.27, steps = 87\n",
      "09:15:41 [DEBUG] evaluate generation 369: reward = -93.69, steps = 75\n",
      "09:15:42 [DEBUG] evaluate generation 370: reward = -93.39, steps = 85\n",
      "09:15:44 [DEBUG] evaluate generation 371: reward = -92.95, steps = 77\n",
      "09:15:45 [DEBUG] evaluate generation 372: reward = -93.33, steps = 83\n",
      "09:15:48 [DEBUG] evaluate generation 373: reward = -93.26, steps = 81\n",
      "09:15:49 [DEBUG] evaluate generation 374: reward = -93.27, steps = 82\n",
      "09:15:51 [DEBUG] evaluate generation 375: reward = -93.48, steps = 85\n",
      "09:15:53 [DEBUG] evaluate generation 376: reward = -93.31, steps = 76\n",
      "09:15:54 [DEBUG] evaluate generation 377: reward = -93.36, steps = 88\n",
      "09:15:56 [DEBUG] evaluate generation 378: reward = -93.63, steps = 83\n",
      "09:15:57 [DEBUG] evaluate generation 379: reward = -93.76, steps = 80\n",
      "09:15:59 [DEBUG] evaluate generation 380: reward = -93.64, steps = 79\n",
      "09:16:00 [DEBUG] evaluate generation 381: reward = -93.60, steps = 88\n",
      "09:16:01 [DEBUG] evaluate generation 382: reward = -93.57, steps = 83\n",
      "09:16:02 [DEBUG] evaluate generation 383: reward = -93.29, steps = 80\n",
      "09:16:04 [DEBUG] evaluate generation 384: reward = -92.77, steps = 95\n",
      "09:16:06 [DEBUG] evaluate generation 385: reward = -93.52, steps = 89\n",
      "09:16:08 [DEBUG] evaluate generation 386: reward = -92.78, steps = 93\n",
      "09:16:13 [DEBUG] evaluate generation 387: reward = -92.76, steps = 94\n",
      "09:16:15 [DEBUG] evaluate generation 388: reward = -92.85, steps = 94\n",
      "09:16:16 [DEBUG] evaluate generation 389: reward = -92.76, steps = 94\n",
      "09:16:17 [DEBUG] evaluate generation 390: reward = -92.71, steps = 94\n",
      "09:16:18 [DEBUG] evaluate generation 391: reward = -92.75, steps = 95\n",
      "09:16:19 [DEBUG] evaluate generation 392: reward = -92.80, steps = 92\n",
      "09:16:20 [DEBUG] evaluate generation 393: reward = -92.42, steps = 96\n",
      "09:16:24 [DEBUG] evaluate generation 394: reward = -92.59, steps = 93\n",
      "09:16:27 [DEBUG] evaluate generation 395: reward = -92.83, steps = 92\n",
      "09:16:30 [DEBUG] evaluate generation 396: reward = -92.98, steps = 93\n",
      "09:16:32 [DEBUG] evaluate generation 397: reward = -93.38, steps = 90\n",
      "09:16:33 [DEBUG] evaluate generation 398: reward = -92.91, steps = 94\n",
      "09:16:35 [DEBUG] evaluate generation 399: reward = -92.68, steps = 95\n",
      "09:16:39 [DEBUG] evaluate generation 400: reward = -92.78, steps = 97\n",
      "09:16:41 [DEBUG] evaluate generation 401: reward = -92.90, steps = 95\n",
      "09:16:42 [DEBUG] evaluate generation 402: reward = -92.72, steps = 96\n",
      "09:16:43 [DEBUG] evaluate generation 403: reward = -92.68, steps = 96\n",
      "09:16:46 [DEBUG] evaluate generation 404: reward = -92.57, steps = 99\n",
      "09:16:48 [DEBUG] evaluate generation 405: reward = -92.75, steps = 94\n",
      "09:16:51 [DEBUG] evaluate generation 406: reward = -92.77, steps = 95\n",
      "09:16:54 [DEBUG] evaluate generation 407: reward = -93.35, steps = 85\n",
      "09:16:55 [DEBUG] evaluate generation 408: reward = -92.97, steps = 83\n",
      "09:16:59 [DEBUG] evaluate generation 409: reward = -94.03, steps = 70\n",
      "09:17:00 [DEBUG] evaluate generation 410: reward = -92.99, steps = 85\n",
      "09:17:02 [DEBUG] evaluate generation 411: reward = -93.22, steps = 84\n",
      "09:17:04 [DEBUG] evaluate generation 412: reward = -93.40, steps = 80\n",
      "09:17:05 [DEBUG] evaluate generation 413: reward = -93.25, steps = 82\n",
      "09:17:06 [DEBUG] evaluate generation 414: reward = -92.95, steps = 85\n",
      "09:17:08 [DEBUG] evaluate generation 415: reward = -92.88, steps = 85\n",
      "09:17:11 [DEBUG] evaluate generation 416: reward = -93.56, steps = 79\n",
      "09:17:12 [DEBUG] evaluate generation 417: reward = -93.45, steps = 72\n",
      "09:17:15 [DEBUG] evaluate generation 418: reward = -93.82, steps = 69\n",
      "09:17:18 [DEBUG] evaluate generation 419: reward = -92.28, steps = 103\n",
      "09:17:19 [DEBUG] evaluate generation 420: reward = -93.50, steps = 71\n",
      "09:17:20 [DEBUG] evaluate generation 421: reward = -94.06, steps = 70\n",
      "09:17:22 [DEBUG] evaluate generation 422: reward = -92.73, steps = 84\n",
      "09:17:24 [DEBUG] evaluate generation 423: reward = -93.43, steps = 69\n",
      "09:17:25 [DEBUG] evaluate generation 424: reward = -93.74, steps = 69\n",
      "09:17:27 [DEBUG] evaluate generation 425: reward = -93.33, steps = 70\n",
      "09:17:29 [DEBUG] evaluate generation 426: reward = -93.12, steps = 70\n",
      "09:17:30 [DEBUG] evaluate generation 427: reward = -92.11, steps = 93\n",
      "09:17:33 [DEBUG] evaluate generation 428: reward = -92.69, steps = 70\n",
      "09:17:34 [DEBUG] evaluate generation 429: reward = -92.62, steps = 92\n",
      "09:17:40 [DEBUG] evaluate generation 430: reward = -92.66, steps = 72\n",
      "09:17:42 [DEBUG] evaluate generation 431: reward = -92.53, steps = 103\n",
      "09:17:44 [DEBUG] evaluate generation 432: reward = -91.80, steps = 97\n",
      "09:17:45 [DEBUG] evaluate generation 433: reward = -93.06, steps = 80\n",
      "09:17:47 [DEBUG] evaluate generation 434: reward = -93.38, steps = 75\n",
      "09:17:48 [DEBUG] evaluate generation 435: reward = -92.84, steps = 84\n",
      "09:17:49 [DEBUG] evaluate generation 436: reward = -91.22, steps = 84\n",
      "09:17:51 [DEBUG] evaluate generation 437: reward = -92.25, steps = 82\n",
      "09:17:53 [DEBUG] evaluate generation 438: reward = -91.76, steps = 82\n",
      "09:17:57 [DEBUG] evaluate generation 439: reward = -92.84, steps = 99\n",
      "09:17:59 [DEBUG] evaluate generation 440: reward = -91.27, steps = 79\n",
      "09:18:04 [DEBUG] evaluate generation 441: reward = -91.52, steps = 82\n",
      "09:18:07 [DEBUG] evaluate generation 442: reward = -92.55, steps = 77\n",
      "09:18:09 [DEBUG] evaluate generation 443: reward = -93.59, steps = 67\n",
      "09:18:12 [DEBUG] evaluate generation 444: reward = -93.02, steps = 74\n",
      "09:18:14 [DEBUG] evaluate generation 445: reward = -92.12, steps = 83\n",
      "09:18:15 [DEBUG] evaluate generation 446: reward = -92.67, steps = 101\n",
      "09:18:16 [DEBUG] evaluate generation 447: reward = -93.35, steps = 79\n",
      "09:18:19 [DEBUG] evaluate generation 448: reward = -93.64, steps = 71\n",
      "09:18:21 [DEBUG] evaluate generation 449: reward = -92.79, steps = 82\n",
      "09:18:23 [DEBUG] evaluate generation 450: reward = -92.95, steps = 79\n",
      "09:18:24 [DEBUG] evaluate generation 451: reward = -93.73, steps = 72\n",
      "09:18:26 [DEBUG] evaluate generation 452: reward = -92.27, steps = 81\n",
      "09:18:28 [DEBUG] evaluate generation 453: reward = -92.63, steps = 82\n",
      "09:18:31 [DEBUG] evaluate generation 454: reward = -92.38, steps = 81\n",
      "09:18:32 [DEBUG] evaluate generation 455: reward = -92.63, steps = 79\n",
      "09:18:33 [DEBUG] evaluate generation 456: reward = -92.59, steps = 74\n",
      "09:18:34 [DEBUG] evaluate generation 457: reward = -91.19, steps = 80\n",
      "09:18:39 [DEBUG] evaluate generation 458: reward = -91.76, steps = 79\n",
      "09:18:40 [DEBUG] evaluate generation 459: reward = -91.79, steps = 79\n",
      "09:18:42 [DEBUG] evaluate generation 460: reward = -90.60, steps = 101\n",
      "09:18:44 [DEBUG] evaluate generation 461: reward = -91.91, steps = 78\n",
      "09:18:46 [DEBUG] evaluate generation 462: reward = -91.98, steps = 77\n",
      "09:18:50 [DEBUG] evaluate generation 463: reward = -91.77, steps = 76\n",
      "09:18:52 [DEBUG] evaluate generation 464: reward = -91.98, steps = 75\n",
      "09:18:53 [DEBUG] evaluate generation 465: reward = -92.38, steps = 76\n",
      "09:18:55 [DEBUG] evaluate generation 466: reward = -92.31, steps = 79\n",
      "09:18:57 [DEBUG] evaluate generation 467: reward = -92.38, steps = 80\n",
      "09:19:01 [DEBUG] evaluate generation 468: reward = -92.94, steps = 73\n",
      "09:19:03 [DEBUG] evaluate generation 469: reward = -93.50, steps = 70\n",
      "09:19:04 [DEBUG] evaluate generation 470: reward = -93.12, steps = 72\n",
      "09:19:05 [DEBUG] evaluate generation 471: reward = -92.72, steps = 73\n",
      "09:19:06 [DEBUG] evaluate generation 472: reward = -93.05, steps = 69\n",
      "09:19:10 [DEBUG] evaluate generation 473: reward = -92.52, steps = 75\n",
      "09:19:11 [DEBUG] evaluate generation 474: reward = -92.37, steps = 75\n",
      "09:19:11 [DEBUG] evaluate generation 475: reward = -92.66, steps = 78\n",
      "09:19:14 [DEBUG] evaluate generation 476: reward = -93.01, steps = 79\n",
      "09:19:15 [DEBUG] evaluate generation 477: reward = -91.85, steps = 76\n",
      "09:19:17 [DEBUG] evaluate generation 478: reward = -92.21, steps = 77\n",
      "09:19:19 [DEBUG] evaluate generation 479: reward = -92.02, steps = 75\n",
      "09:19:20 [DEBUG] evaluate generation 480: reward = -92.41, steps = 84\n",
      "09:19:22 [DEBUG] evaluate generation 481: reward = -92.30, steps = 77\n",
      "09:19:25 [DEBUG] evaluate generation 482: reward = -92.63, steps = 81\n",
      "09:19:26 [DEBUG] evaluate generation 483: reward = -92.68, steps = 80\n",
      "09:19:28 [DEBUG] evaluate generation 484: reward = -92.63, steps = 80\n",
      "09:19:30 [DEBUG] evaluate generation 485: reward = -91.45, steps = 78\n",
      "09:19:32 [DEBUG] evaluate generation 486: reward = -91.72, steps = 81\n",
      "09:19:33 [DEBUG] evaluate generation 487: reward = -91.68, steps = 79\n",
      "09:19:36 [DEBUG] evaluate generation 488: reward = -91.83, steps = 80\n",
      "09:19:37 [DEBUG] evaluate generation 489: reward = -91.61, steps = 81\n",
      "09:19:39 [DEBUG] evaluate generation 490: reward = -92.87, steps = 83\n",
      "09:19:40 [DEBUG] evaluate generation 491: reward = -91.89, steps = 83\n",
      "09:19:41 [DEBUG] evaluate generation 492: reward = -91.15, steps = 82\n",
      "09:19:42 [DEBUG] evaluate generation 493: reward = -91.76, steps = 81\n",
      "09:19:43 [DEBUG] evaluate generation 494: reward = -92.08, steps = 80\n",
      "09:19:45 [DEBUG] evaluate generation 495: reward = -92.42, steps = 89\n",
      "09:19:46 [DEBUG] evaluate generation 496: reward = -91.81, steps = 78\n",
      "09:19:49 [DEBUG] evaluate generation 497: reward = -91.77, steps = 77\n",
      "09:19:52 [DEBUG] evaluate generation 498: reward = -92.32, steps = 85\n",
      "09:19:53 [DEBUG] evaluate generation 499: reward = -92.34, steps = 86\n",
      "09:19:55 [DEBUG] evaluate generation 500: reward = -92.86, steps = 78\n",
      "09:19:58 [DEBUG] evaluate generation 501: reward = -93.10, steps = 74\n",
      "09:20:00 [DEBUG] evaluate generation 502: reward = -92.81, steps = 73\n",
      "09:20:01 [DEBUG] evaluate generation 503: reward = -92.31, steps = 76\n",
      "09:20:04 [DEBUG] evaluate generation 504: reward = -92.85, steps = 80\n",
      "09:20:05 [DEBUG] evaluate generation 505: reward = -92.60, steps = 76\n",
      "09:20:06 [DEBUG] evaluate generation 506: reward = -93.22, steps = 69\n",
      "09:20:09 [DEBUG] evaluate generation 507: reward = -93.42, steps = 72\n",
      "09:20:10 [DEBUG] evaluate generation 508: reward = -93.05, steps = 75\n",
      "09:20:11 [DEBUG] evaluate generation 509: reward = -93.55, steps = 69\n",
      "09:20:12 [DEBUG] evaluate generation 510: reward = -92.70, steps = 84\n",
      "09:20:14 [DEBUG] evaluate generation 511: reward = -92.84, steps = 79\n",
      "09:20:15 [DEBUG] evaluate generation 512: reward = -92.54, steps = 81\n",
      "09:20:17 [DEBUG] evaluate generation 513: reward = -91.93, steps = 82\n",
      "09:20:19 [DEBUG] evaluate generation 514: reward = -92.57, steps = 83\n",
      "09:20:22 [DEBUG] evaluate generation 515: reward = -92.23, steps = 80\n",
      "09:20:26 [DEBUG] evaluate generation 516: reward = -92.23, steps = 79\n",
      "09:20:29 [DEBUG] evaluate generation 517: reward = -92.55, steps = 84\n",
      "09:20:30 [DEBUG] evaluate generation 518: reward = -92.62, steps = 85\n",
      "09:20:31 [DEBUG] evaluate generation 519: reward = -92.51, steps = 84\n",
      "09:20:32 [DEBUG] evaluate generation 520: reward = -92.21, steps = 96\n",
      "09:20:35 [DEBUG] evaluate generation 521: reward = -92.43, steps = 86\n",
      "09:20:37 [DEBUG] evaluate generation 522: reward = -92.81, steps = 90\n",
      "09:20:39 [DEBUG] evaluate generation 523: reward = -92.22, steps = 87\n",
      "09:20:41 [DEBUG] evaluate generation 524: reward = -93.37, steps = 82\n",
      "09:20:43 [DEBUG] evaluate generation 525: reward = -93.12, steps = 80\n",
      "09:20:44 [DEBUG] evaluate generation 526: reward = -92.78, steps = 84\n",
      "09:20:46 [DEBUG] evaluate generation 527: reward = -93.56, steps = 73\n",
      "09:20:49 [DEBUG] evaluate generation 528: reward = -93.33, steps = 80\n",
      "09:20:53 [DEBUG] evaluate generation 529: reward = -94.00, steps = 73\n",
      "09:20:56 [DEBUG] evaluate generation 530: reward = -93.86, steps = 71\n",
      "09:20:57 [DEBUG] evaluate generation 531: reward = -93.99, steps = 72\n",
      "09:20:59 [DEBUG] evaluate generation 532: reward = -93.83, steps = 70\n",
      "09:21:00 [DEBUG] evaluate generation 533: reward = -93.82, steps = 70\n",
      "09:21:03 [DEBUG] evaluate generation 534: reward = -93.87, steps = 71\n",
      "09:21:05 [DEBUG] evaluate generation 535: reward = -94.05, steps = 68\n",
      "09:21:08 [DEBUG] evaluate generation 536: reward = -94.04, steps = 71\n",
      "09:21:10 [DEBUG] evaluate generation 537: reward = -94.18, steps = 66\n",
      "09:21:11 [DEBUG] evaluate generation 538: reward = -94.45, steps = 69\n",
      "09:21:12 [DEBUG] evaluate generation 539: reward = -93.48, steps = 78\n",
      "09:21:13 [DEBUG] evaluate generation 540: reward = -93.08, steps = 81\n",
      "09:21:14 [DEBUG] evaluate generation 541: reward = -92.62, steps = 89\n",
      "09:21:16 [DEBUG] evaluate generation 542: reward = -92.87, steps = 91\n",
      "09:21:17 [DEBUG] evaluate generation 543: reward = -93.06, steps = 86\n",
      "09:21:18 [DEBUG] evaluate generation 544: reward = -92.38, steps = 82\n",
      "09:21:20 [DEBUG] evaluate generation 545: reward = -92.39, steps = 80\n",
      "09:21:22 [DEBUG] evaluate generation 546: reward = -92.48, steps = 78\n",
      "09:21:23 [DEBUG] evaluate generation 547: reward = -92.53, steps = 83\n",
      "09:21:24 [DEBUG] evaluate generation 548: reward = -93.29, steps = 79\n",
      "09:21:27 [DEBUG] evaluate generation 549: reward = -92.88, steps = 88\n",
      "09:21:30 [DEBUG] evaluate generation 550: reward = -93.40, steps = 80\n",
      "09:21:31 [DEBUG] evaluate generation 551: reward = -92.87, steps = 88\n",
      "09:21:32 [DEBUG] evaluate generation 552: reward = -93.74, steps = 77\n",
      "09:21:34 [DEBUG] evaluate generation 553: reward = -93.55, steps = 77\n",
      "09:21:36 [DEBUG] evaluate generation 554: reward = -93.75, steps = 72\n",
      "09:21:37 [DEBUG] evaluate generation 555: reward = -93.69, steps = 75\n",
      "09:21:39 [DEBUG] evaluate generation 556: reward = -93.85, steps = 74\n",
      "09:21:41 [DEBUG] evaluate generation 557: reward = -93.88, steps = 71\n",
      "09:21:42 [DEBUG] evaluate generation 558: reward = -93.57, steps = 75\n",
      "09:21:43 [DEBUG] evaluate generation 559: reward = -92.61, steps = 82\n",
      "09:21:43 [DEBUG] evaluate generation 560: reward = -93.30, steps = 81\n",
      "09:21:47 [DEBUG] evaluate generation 561: reward = -93.80, steps = 72\n",
      "09:21:48 [DEBUG] evaluate generation 562: reward = -93.68, steps = 73\n",
      "09:21:49 [DEBUG] evaluate generation 563: reward = -93.10, steps = 76\n",
      "09:21:51 [DEBUG] evaluate generation 564: reward = -93.45, steps = 77\n",
      "09:21:53 [DEBUG] evaluate generation 565: reward = -93.95, steps = 67\n",
      "09:21:55 [DEBUG] evaluate generation 566: reward = -94.11, steps = 66\n",
      "09:21:56 [DEBUG] evaluate generation 567: reward = -93.70, steps = 70\n",
      "09:21:58 [DEBUG] evaluate generation 568: reward = -93.93, steps = 69\n",
      "09:21:59 [DEBUG] evaluate generation 569: reward = -93.74, steps = 69\n",
      "09:22:00 [DEBUG] evaluate generation 570: reward = -93.46, steps = 84\n",
      "09:22:01 [DEBUG] evaluate generation 571: reward = -93.05, steps = 86\n",
      "09:22:02 [DEBUG] evaluate generation 572: reward = -92.15, steps = 77\n",
      "09:22:03 [DEBUG] evaluate generation 573: reward = -92.05, steps = 79\n",
      "09:22:04 [DEBUG] evaluate generation 574: reward = -92.74, steps = 85\n",
      "09:22:06 [DEBUG] evaluate generation 575: reward = -91.22, steps = 79\n",
      "09:22:07 [DEBUG] evaluate generation 576: reward = -91.63, steps = 80\n",
      "09:22:08 [DEBUG] evaluate generation 577: reward = -91.12, steps = 81\n",
      "09:22:11 [DEBUG] evaluate generation 578: reward = -91.22, steps = 81\n",
      "09:22:13 [DEBUG] evaluate generation 579: reward = -91.21, steps = 81\n",
      "09:22:14 [DEBUG] evaluate generation 580: reward = -91.59, steps = 87\n",
      "09:22:18 [DEBUG] evaluate generation 581: reward = -91.98, steps = 84\n",
      "09:22:19 [DEBUG] evaluate generation 582: reward = -91.53, steps = 81\n",
      "09:22:20 [DEBUG] evaluate generation 583: reward = -91.37, steps = 78\n",
      "09:22:23 [DEBUG] evaluate generation 584: reward = -91.51, steps = 79\n",
      "09:22:26 [DEBUG] evaluate generation 585: reward = -92.39, steps = 82\n",
      "09:22:28 [DEBUG] evaluate generation 586: reward = -92.21, steps = 76\n",
      "09:22:30 [DEBUG] evaluate generation 587: reward = -92.63, steps = 72\n",
      "09:22:34 [DEBUG] evaluate generation 588: reward = -93.42, steps = 68\n",
      "09:22:35 [DEBUG] evaluate generation 589: reward = -92.18, steps = 76\n",
      "09:22:37 [DEBUG] evaluate generation 590: reward = -92.77, steps = 74\n",
      "09:22:40 [DEBUG] evaluate generation 591: reward = -92.79, steps = 73\n",
      "09:22:43 [DEBUG] evaluate generation 592: reward = -93.00, steps = 72\n",
      "09:22:44 [DEBUG] evaluate generation 593: reward = -92.71, steps = 73\n",
      "09:22:45 [DEBUG] evaluate generation 594: reward = -93.02, steps = 71\n",
      "09:22:46 [DEBUG] evaluate generation 595: reward = -92.94, steps = 74\n",
      "09:22:47 [DEBUG] evaluate generation 596: reward = -92.60, steps = 77\n",
      "09:22:48 [DEBUG] evaluate generation 597: reward = -92.46, steps = 80\n",
      "09:22:50 [DEBUG] evaluate generation 598: reward = -92.82, steps = 79\n",
      "09:22:53 [DEBUG] evaluate generation 599: reward = -93.14, steps = 73\n",
      "09:22:54 [DEBUG] evaluate generation 600: reward = -92.80, steps = 80\n",
      "09:22:56 [DEBUG] evaluate generation 601: reward = -92.48, steps = 84\n",
      "09:22:57 [DEBUG] evaluate generation 602: reward = -92.63, steps = 78\n",
      "09:22:59 [DEBUG] evaluate generation 603: reward = -92.36, steps = 80\n",
      "09:23:01 [DEBUG] evaluate generation 604: reward = -91.66, steps = 79\n",
      "09:23:03 [DEBUG] evaluate generation 605: reward = -92.22, steps = 88\n",
      "09:23:06 [DEBUG] evaluate generation 606: reward = -92.31, steps = 82\n",
      "09:23:09 [DEBUG] evaluate generation 607: reward = -92.61, steps = 76\n",
      "09:23:12 [DEBUG] evaluate generation 608: reward = -92.96, steps = 74\n",
      "09:23:13 [DEBUG] evaluate generation 609: reward = -93.04, steps = 74\n",
      "09:23:16 [DEBUG] evaluate generation 610: reward = -93.25, steps = 71\n",
      "09:23:18 [DEBUG] evaluate generation 611: reward = -93.65, steps = 68\n",
      "09:23:20 [DEBUG] evaluate generation 612: reward = -93.63, steps = 68\n",
      "09:23:21 [DEBUG] evaluate generation 613: reward = -93.90, steps = 68\n",
      "09:23:22 [DEBUG] evaluate generation 614: reward = -94.02, steps = 68\n",
      "09:23:25 [DEBUG] evaluate generation 615: reward = -94.22, steps = 66\n",
      "09:23:26 [DEBUG] evaluate generation 616: reward = -94.18, steps = 67\n",
      "09:23:28 [DEBUG] evaluate generation 617: reward = -94.21, steps = 68\n",
      "09:23:29 [DEBUG] evaluate generation 618: reward = -94.37, steps = 66\n",
      "09:23:31 [DEBUG] evaluate generation 619: reward = -94.13, steps = 68\n",
      "09:23:33 [DEBUG] evaluate generation 620: reward = -94.52, steps = 66\n",
      "09:23:34 [DEBUG] evaluate generation 621: reward = -94.34, steps = 66\n",
      "09:23:37 [DEBUG] evaluate generation 622: reward = -94.51, steps = 66\n",
      "09:23:40 [DEBUG] evaluate generation 623: reward = -94.23, steps = 68\n",
      "09:23:42 [DEBUG] evaluate generation 624: reward = -94.56, steps = 66\n",
      "09:23:43 [DEBUG] evaluate generation 625: reward = -94.23, steps = 67\n",
      "09:23:45 [DEBUG] evaluate generation 626: reward = -94.51, steps = 66\n",
      "09:23:47 [DEBUG] evaluate generation 627: reward = -94.42, steps = 66\n",
      "09:23:49 [DEBUG] evaluate generation 628: reward = -94.58, steps = 66\n",
      "09:23:51 [DEBUG] evaluate generation 629: reward = -94.27, steps = 67\n",
      "09:23:52 [DEBUG] evaluate generation 630: reward = -94.08, steps = 67\n",
      "09:23:53 [DEBUG] evaluate generation 631: reward = -94.36, steps = 66\n",
      "09:23:54 [DEBUG] evaluate generation 632: reward = -94.43, steps = 67\n",
      "09:23:56 [DEBUG] evaluate generation 633: reward = -94.47, steps = 65\n",
      "09:23:58 [DEBUG] evaluate generation 634: reward = -94.30, steps = 67\n",
      "09:24:02 [DEBUG] evaluate generation 635: reward = -94.21, steps = 67\n",
      "09:24:05 [DEBUG] evaluate generation 636: reward = -94.30, steps = 66\n",
      "09:24:05 [DEBUG] evaluate generation 637: reward = -94.58, steps = 66\n",
      "09:24:06 [DEBUG] evaluate generation 638: reward = -94.01, steps = 67\n",
      "09:24:08 [DEBUG] evaluate generation 639: reward = -94.38, steps = 66\n",
      "09:24:09 [DEBUG] evaluate generation 640: reward = -94.38, steps = 67\n",
      "09:24:10 [DEBUG] evaluate generation 641: reward = -94.45, steps = 65\n",
      "09:24:11 [DEBUG] evaluate generation 642: reward = -94.22, steps = 66\n",
      "09:24:13 [DEBUG] evaluate generation 643: reward = -94.31, steps = 66\n",
      "09:24:15 [DEBUG] evaluate generation 644: reward = -94.54, steps = 65\n",
      "09:24:15 [DEBUG] evaluate generation 645: reward = -93.59, steps = 68\n",
      "09:24:16 [DEBUG] evaluate generation 646: reward = -93.21, steps = 69\n",
      "09:24:18 [DEBUG] evaluate generation 647: reward = -94.44, steps = 66\n",
      "09:24:22 [DEBUG] evaluate generation 648: reward = -94.42, steps = 67\n",
      "09:24:23 [DEBUG] evaluate generation 649: reward = -93.87, steps = 68\n",
      "09:24:25 [DEBUG] evaluate generation 650: reward = -94.11, steps = 67\n",
      "09:24:26 [DEBUG] evaluate generation 651: reward = -94.25, steps = 67\n",
      "09:24:29 [DEBUG] evaluate generation 652: reward = -93.59, steps = 68\n",
      "09:24:31 [DEBUG] evaluate generation 653: reward = -94.20, steps = 66\n",
      "09:24:32 [DEBUG] evaluate generation 654: reward = -93.65, steps = 69\n",
      "09:24:35 [DEBUG] evaluate generation 655: reward = -92.86, steps = 73\n",
      "09:24:36 [DEBUG] evaluate generation 656: reward = -93.61, steps = 68\n",
      "09:24:38 [DEBUG] evaluate generation 657: reward = -94.18, steps = 67\n",
      "09:24:39 [DEBUG] evaluate generation 658: reward = -92.82, steps = 79\n",
      "09:24:40 [DEBUG] evaluate generation 659: reward = -93.34, steps = 69\n",
      "09:24:43 [DEBUG] evaluate generation 660: reward = -92.57, steps = 70\n",
      "09:24:45 [DEBUG] evaluate generation 661: reward = -92.66, steps = 75\n",
      "09:24:46 [DEBUG] evaluate generation 662: reward = -92.12, steps = 81\n",
      "09:24:48 [DEBUG] evaluate generation 663: reward = -92.58, steps = 71\n",
      "09:24:49 [DEBUG] evaluate generation 664: reward = -92.48, steps = 76\n",
      "09:24:50 [DEBUG] evaluate generation 665: reward = -92.47, steps = 74\n",
      "09:24:51 [DEBUG] evaluate generation 666: reward = -91.94, steps = 83\n",
      "09:24:54 [DEBUG] evaluate generation 667: reward = -92.40, steps = 81\n",
      "09:24:57 [DEBUG] evaluate generation 668: reward = -93.67, steps = 71\n",
      "09:24:59 [DEBUG] evaluate generation 669: reward = -92.46, steps = 78\n",
      "09:25:03 [DEBUG] evaluate generation 670: reward = -92.47, steps = 76\n",
      "09:25:05 [DEBUG] evaluate generation 671: reward = -92.78, steps = 73\n",
      "09:25:07 [DEBUG] evaluate generation 672: reward = -93.07, steps = 70\n",
      "09:25:08 [DEBUG] evaluate generation 673: reward = -92.11, steps = 84\n",
      "09:25:09 [DEBUG] evaluate generation 674: reward = -92.46, steps = 78\n",
      "09:25:12 [DEBUG] evaluate generation 675: reward = -93.22, steps = 69\n",
      "09:25:15 [DEBUG] evaluate generation 676: reward = -93.08, steps = 71\n",
      "09:25:18 [DEBUG] evaluate generation 677: reward = -94.07, steps = 65\n",
      "09:25:21 [DEBUG] evaluate generation 678: reward = -94.18, steps = 67\n",
      "09:25:23 [DEBUG] evaluate generation 679: reward = -94.10, steps = 68\n",
      "09:25:25 [DEBUG] evaluate generation 680: reward = -94.32, steps = 69\n",
      "09:25:28 [DEBUG] evaluate generation 681: reward = -94.57, steps = 65\n",
      "09:25:29 [DEBUG] evaluate generation 682: reward = -94.20, steps = 67\n",
      "09:25:31 [DEBUG] evaluate generation 683: reward = -94.23, steps = 68\n",
      "09:25:33 [DEBUG] evaluate generation 684: reward = -94.51, steps = 65\n",
      "09:25:34 [DEBUG] evaluate generation 685: reward = -94.58, steps = 66\n",
      "09:25:35 [DEBUG] evaluate generation 686: reward = -93.94, steps = 70\n",
      "09:25:36 [DEBUG] evaluate generation 687: reward = -94.50, steps = 66\n",
      "09:25:37 [DEBUG] evaluate generation 688: reward = -94.07, steps = 67\n",
      "09:25:38 [DEBUG] evaluate generation 689: reward = -94.57, steps = 65\n",
      "09:25:40 [DEBUG] evaluate generation 690: reward = -94.56, steps = 65\n",
      "09:25:42 [DEBUG] evaluate generation 691: reward = -94.31, steps = 66\n",
      "09:25:44 [DEBUG] evaluate generation 692: reward = -94.13, steps = 66\n",
      "09:25:45 [DEBUG] evaluate generation 693: reward = -93.87, steps = 70\n",
      "09:25:46 [DEBUG] evaluate generation 694: reward = -93.89, steps = 70\n",
      "09:25:47 [DEBUG] evaluate generation 695: reward = -93.65, steps = 70\n",
      "09:25:48 [DEBUG] evaluate generation 696: reward = -93.89, steps = 68\n",
      "09:25:49 [DEBUG] evaluate generation 697: reward = -93.81, steps = 72\n",
      "09:25:52 [DEBUG] evaluate generation 698: reward = -93.70, steps = 70\n",
      "09:25:53 [DEBUG] evaluate generation 699: reward = -93.65, steps = 71\n",
      "09:25:55 [DEBUG] evaluate generation 700: reward = -93.97, steps = 68\n",
      "09:25:57 [DEBUG] evaluate generation 701: reward = -94.42, steps = 65\n",
      "09:25:59 [DEBUG] evaluate generation 702: reward = -94.45, steps = 65\n",
      "09:26:01 [DEBUG] evaluate generation 703: reward = -94.99, steps = 65\n",
      "09:26:04 [DEBUG] evaluate generation 704: reward = -94.73, steps = 66\n",
      "09:26:06 [DEBUG] evaluate generation 705: reward = -94.73, steps = 66\n",
      "09:26:07 [DEBUG] evaluate generation 706: reward = -94.65, steps = 66\n",
      "09:26:08 [DEBUG] evaluate generation 707: reward = -94.67, steps = 67\n",
      "09:26:09 [DEBUG] evaluate generation 708: reward = -94.50, steps = 66\n",
      "09:26:10 [DEBUG] evaluate generation 709: reward = -94.39, steps = 66\n",
      "09:26:11 [DEBUG] evaluate generation 710: reward = -94.32, steps = 67\n",
      "09:26:12 [DEBUG] evaluate generation 711: reward = -93.68, steps = 68\n",
      "09:26:13 [DEBUG] evaluate generation 712: reward = -94.55, steps = 65\n",
      "09:26:14 [DEBUG] evaluate generation 713: reward = -91.97, steps = 76\n",
      "09:26:16 [DEBUG] evaluate generation 714: reward = -92.54, steps = 73\n",
      "09:26:17 [DEBUG] evaluate generation 715: reward = -92.33, steps = 74\n",
      "09:26:19 [DEBUG] evaluate generation 716: reward = -92.65, steps = 75\n",
      "09:26:20 [DEBUG] evaluate generation 717: reward = -93.61, steps = 68\n",
      "09:26:22 [DEBUG] evaluate generation 718: reward = -92.20, steps = 77\n",
      "09:26:25 [DEBUG] evaluate generation 719: reward = -93.44, steps = 67\n",
      "09:26:26 [DEBUG] evaluate generation 720: reward = -94.03, steps = 67\n",
      "09:26:30 [DEBUG] evaluate generation 721: reward = -94.29, steps = 66\n",
      "09:26:32 [DEBUG] evaluate generation 722: reward = -94.15, steps = 66\n",
      "09:26:33 [DEBUG] evaluate generation 723: reward = -94.24, steps = 66\n",
      "09:26:35 [DEBUG] evaluate generation 724: reward = -94.36, steps = 67\n",
      "09:26:36 [DEBUG] evaluate generation 725: reward = -93.76, steps = 68\n",
      "09:26:38 [DEBUG] evaluate generation 726: reward = -94.07, steps = 67\n",
      "09:26:40 [DEBUG] evaluate generation 727: reward = -94.56, steps = 67\n",
      "09:26:42 [DEBUG] evaluate generation 728: reward = -93.96, steps = 68\n",
      "09:26:43 [DEBUG] evaluate generation 729: reward = -94.28, steps = 67\n",
      "09:26:45 [DEBUG] evaluate generation 730: reward = -93.63, steps = 71\n",
      "09:26:47 [DEBUG] evaluate generation 731: reward = -93.76, steps = 69\n",
      "09:26:49 [DEBUG] evaluate generation 732: reward = -93.86, steps = 68\n",
      "09:26:52 [DEBUG] evaluate generation 733: reward = -94.03, steps = 66\n",
      "09:26:54 [DEBUG] evaluate generation 734: reward = -94.11, steps = 68\n",
      "09:26:56 [DEBUG] evaluate generation 735: reward = -94.37, steps = 66\n",
      "09:26:57 [DEBUG] evaluate generation 736: reward = -94.90, steps = 64\n",
      "09:26:58 [DEBUG] evaluate generation 737: reward = -93.98, steps = 68\n",
      "09:26:59 [DEBUG] evaluate generation 738: reward = -93.91, steps = 69\n",
      "09:27:00 [DEBUG] evaluate generation 739: reward = -93.57, steps = 68\n",
      "09:27:01 [DEBUG] evaluate generation 740: reward = -93.86, steps = 69\n",
      "09:27:04 [DEBUG] evaluate generation 741: reward = -93.77, steps = 70\n",
      "09:27:05 [DEBUG] evaluate generation 742: reward = -93.47, steps = 69\n",
      "09:27:07 [DEBUG] evaluate generation 743: reward = -93.04, steps = 70\n",
      "09:27:09 [DEBUG] evaluate generation 744: reward = -93.45, steps = 69\n",
      "09:27:11 [DEBUG] evaluate generation 745: reward = -93.70, steps = 69\n",
      "09:27:12 [DEBUG] evaluate generation 746: reward = -93.55, steps = 76\n",
      "09:27:14 [DEBUG] evaluate generation 747: reward = -94.50, steps = 65\n",
      "09:27:14 [DEBUG] evaluate generation 748: reward = -93.98, steps = 68\n",
      "09:27:16 [DEBUG] evaluate generation 749: reward = -94.40, steps = 67\n",
      "09:27:18 [DEBUG] evaluate generation 750: reward = -93.76, steps = 69\n",
      "09:27:19 [DEBUG] evaluate generation 751: reward = -93.70, steps = 71\n",
      "09:27:21 [DEBUG] evaluate generation 752: reward = -93.63, steps = 69\n",
      "09:27:23 [DEBUG] evaluate generation 753: reward = -94.63, steps = 67\n",
      "09:27:24 [DEBUG] evaluate generation 754: reward = -92.82, steps = 73\n",
      "09:27:24 [DEBUG] evaluate generation 755: reward = -92.65, steps = 72\n",
      "09:27:26 [DEBUG] evaluate generation 756: reward = -92.75, steps = 72\n",
      "09:27:28 [DEBUG] evaluate generation 757: reward = -92.90, steps = 71\n",
      "09:27:30 [DEBUG] evaluate generation 758: reward = -93.01, steps = 72\n",
      "09:27:32 [DEBUG] evaluate generation 759: reward = -94.43, steps = 68\n",
      "09:27:34 [DEBUG] evaluate generation 760: reward = -92.80, steps = 77\n",
      "09:27:35 [DEBUG] evaluate generation 761: reward = -94.29, steps = 68\n",
      "09:27:38 [DEBUG] evaluate generation 762: reward = -94.89, steps = 66\n",
      "09:27:39 [DEBUG] evaluate generation 763: reward = -92.98, steps = 72\n",
      "09:27:40 [DEBUG] evaluate generation 764: reward = -93.96, steps = 68\n",
      "09:27:41 [DEBUG] evaluate generation 765: reward = -92.77, steps = 71\n",
      "09:27:42 [DEBUG] evaluate generation 766: reward = -92.17, steps = 80\n",
      "09:27:44 [DEBUG] evaluate generation 767: reward = -92.51, steps = 78\n",
      "09:27:46 [DEBUG] evaluate generation 768: reward = -93.20, steps = 71\n",
      "09:27:48 [DEBUG] evaluate generation 769: reward = -92.68, steps = 78\n",
      "09:27:50 [DEBUG] evaluate generation 770: reward = -93.25, steps = 77\n",
      "09:27:51 [DEBUG] evaluate generation 771: reward = -92.46, steps = 80\n",
      "09:27:53 [DEBUG] evaluate generation 772: reward = -94.64, steps = 68\n",
      "09:27:54 [DEBUG] evaluate generation 773: reward = -94.45, steps = 66\n",
      "09:27:55 [DEBUG] evaluate generation 774: reward = -94.38, steps = 71\n",
      "09:27:58 [DEBUG] evaluate generation 775: reward = -93.82, steps = 70\n",
      "09:27:59 [DEBUG] evaluate generation 776: reward = -94.43, steps = 67\n",
      "09:28:00 [DEBUG] evaluate generation 777: reward = -92.58, steps = 73\n",
      "09:28:01 [DEBUG] evaluate generation 778: reward = -92.39, steps = 73\n",
      "09:28:02 [DEBUG] evaluate generation 779: reward = -92.16, steps = 74\n",
      "09:28:04 [DEBUG] evaluate generation 780: reward = -92.44, steps = 72\n",
      "09:28:06 [DEBUG] evaluate generation 781: reward = -93.71, steps = 68\n",
      "09:28:07 [DEBUG] evaluate generation 782: reward = -93.17, steps = 70\n",
      "09:28:09 [DEBUG] evaluate generation 783: reward = -94.25, steps = 65\n",
      "09:28:10 [DEBUG] evaluate generation 784: reward = -94.45, steps = 66\n",
      "09:28:11 [DEBUG] evaluate generation 785: reward = -92.98, steps = 70\n",
      "09:28:15 [DEBUG] evaluate generation 786: reward = -93.09, steps = 70\n",
      "09:28:17 [DEBUG] evaluate generation 787: reward = -93.54, steps = 69\n",
      "09:28:19 [DEBUG] evaluate generation 788: reward = -94.08, steps = 66\n",
      "09:28:20 [DEBUG] evaluate generation 789: reward = -93.76, steps = 66\n",
      "09:28:20 [DEBUG] evaluate generation 790: reward = -93.42, steps = 69\n",
      "09:28:21 [DEBUG] evaluate generation 791: reward = -93.35, steps = 68\n",
      "09:28:23 [DEBUG] evaluate generation 792: reward = -93.03, steps = 70\n",
      "09:28:25 [DEBUG] evaluate generation 793: reward = -93.34, steps = 68\n",
      "09:28:29 [DEBUG] evaluate generation 794: reward = -94.10, steps = 67\n",
      "09:28:29 [DEBUG] evaluate generation 795: reward = -94.39, steps = 65\n",
      "09:28:31 [DEBUG] evaluate generation 796: reward = -94.20, steps = 65\n",
      "09:28:32 [DEBUG] evaluate generation 797: reward = -93.75, steps = 68\n",
      "09:28:33 [DEBUG] evaluate generation 798: reward = -94.36, steps = 65\n",
      "09:28:35 [DEBUG] evaluate generation 799: reward = -93.22, steps = 69\n",
      "09:28:37 [DEBUG] evaluate generation 800: reward = -93.32, steps = 71\n",
      "09:28:38 [DEBUG] evaluate generation 801: reward = -92.80, steps = 73\n",
      "09:28:39 [DEBUG] evaluate generation 802: reward = -94.25, steps = 68\n",
      "09:28:41 [DEBUG] evaluate generation 803: reward = -94.28, steps = 68\n",
      "09:28:42 [DEBUG] evaluate generation 804: reward = -92.78, steps = 73\n",
      "09:28:43 [DEBUG] evaluate generation 805: reward = -92.71, steps = 73\n",
      "09:28:44 [DEBUG] evaluate generation 806: reward = -93.98, steps = 68\n",
      "09:28:47 [DEBUG] evaluate generation 807: reward = -93.78, steps = 69\n",
      "09:28:48 [DEBUG] evaluate generation 808: reward = -92.71, steps = 72\n",
      "09:28:50 [DEBUG] evaluate generation 809: reward = -93.26, steps = 71\n",
      "09:28:53 [DEBUG] evaluate generation 810: reward = -93.23, steps = 71\n",
      "09:28:54 [DEBUG] evaluate generation 811: reward = -93.07, steps = 71\n",
      "09:28:55 [DEBUG] evaluate generation 812: reward = -93.65, steps = 69\n",
      "09:28:56 [DEBUG] evaluate generation 813: reward = -93.46, steps = 68\n",
      "09:28:57 [DEBUG] evaluate generation 814: reward = -93.94, steps = 67\n",
      "09:28:58 [DEBUG] evaluate generation 815: reward = -93.70, steps = 68\n",
      "09:28:59 [DEBUG] evaluate generation 816: reward = -94.06, steps = 69\n",
      "09:29:02 [DEBUG] evaluate generation 817: reward = -94.44, steps = 67\n",
      "09:29:03 [DEBUG] evaluate generation 818: reward = -93.53, steps = 70\n",
      "09:29:05 [DEBUG] evaluate generation 819: reward = -93.80, steps = 69\n",
      "09:29:06 [DEBUG] evaluate generation 820: reward = -94.52, steps = 68\n",
      "09:29:07 [DEBUG] evaluate generation 821: reward = -94.04, steps = 69\n",
      "09:29:08 [DEBUG] evaluate generation 822: reward = -93.31, steps = 71\n",
      "09:29:09 [DEBUG] evaluate generation 823: reward = -93.90, steps = 69\n",
      "09:29:10 [DEBUG] evaluate generation 824: reward = -93.85, steps = 69\n",
      "09:29:11 [DEBUG] evaluate generation 825: reward = -93.33, steps = 72\n",
      "09:29:13 [DEBUG] evaluate generation 826: reward = -93.78, steps = 70\n",
      "09:29:15 [DEBUG] evaluate generation 827: reward = -93.55, steps = 69\n",
      "09:29:16 [DEBUG] evaluate generation 828: reward = -94.17, steps = 68\n",
      "09:29:17 [DEBUG] evaluate generation 829: reward = -93.08, steps = 71\n",
      "09:29:19 [DEBUG] evaluate generation 830: reward = -92.63, steps = 74\n",
      "09:29:20 [DEBUG] evaluate generation 831: reward = -92.65, steps = 73\n",
      "09:29:21 [DEBUG] evaluate generation 832: reward = -92.52, steps = 76\n",
      "09:29:22 [DEBUG] evaluate generation 833: reward = -93.00, steps = 73\n",
      "09:29:26 [DEBUG] evaluate generation 834: reward = -93.72, steps = 71\n",
      "09:29:28 [DEBUG] evaluate generation 835: reward = -94.07, steps = 70\n",
      "09:29:30 [DEBUG] evaluate generation 836: reward = -92.75, steps = 74\n",
      "09:29:31 [DEBUG] evaluate generation 837: reward = -92.54, steps = 75\n",
      "09:29:33 [DEBUG] evaluate generation 838: reward = -93.51, steps = 70\n",
      "09:29:34 [DEBUG] evaluate generation 839: reward = -93.99, steps = 70\n",
      "09:29:35 [DEBUG] evaluate generation 840: reward = -92.41, steps = 77\n",
      "09:29:36 [DEBUG] evaluate generation 841: reward = -92.58, steps = 74\n",
      "09:29:37 [DEBUG] evaluate generation 842: reward = -92.70, steps = 78\n",
      "09:29:39 [DEBUG] evaluate generation 843: reward = -92.45, steps = 80\n",
      "09:29:41 [DEBUG] evaluate generation 844: reward = -92.38, steps = 79\n",
      "09:29:42 [DEBUG] evaluate generation 845: reward = -92.15, steps = 81\n",
      "09:29:43 [DEBUG] evaluate generation 846: reward = -92.60, steps = 82\n",
      "09:29:45 [DEBUG] evaluate generation 847: reward = -92.35, steps = 81\n",
      "09:29:47 [DEBUG] evaluate generation 848: reward = -92.54, steps = 79\n",
      "09:29:48 [DEBUG] evaluate generation 849: reward = -92.83, steps = 82\n",
      "09:29:50 [DEBUG] evaluate generation 850: reward = -93.43, steps = 73\n",
      "09:29:51 [DEBUG] evaluate generation 851: reward = -92.57, steps = 78\n",
      "09:29:52 [DEBUG] evaluate generation 852: reward = -92.34, steps = 80\n",
      "09:29:52 [DEBUG] evaluate generation 853: reward = -93.81, steps = 73\n",
      "09:29:55 [DEBUG] evaluate generation 854: reward = -92.81, steps = 81\n",
      "09:29:57 [DEBUG] evaluate generation 855: reward = -92.67, steps = 80\n",
      "09:29:59 [DEBUG] evaluate generation 856: reward = -92.56, steps = 80\n",
      "09:30:00 [DEBUG] evaluate generation 857: reward = -92.23, steps = 82\n",
      "09:30:01 [DEBUG] evaluate generation 858: reward = -92.44, steps = 80\n",
      "09:30:02 [DEBUG] evaluate generation 859: reward = -91.28, steps = 84\n",
      "09:30:03 [DEBUG] evaluate generation 860: reward = -91.02, steps = 82\n",
      "09:30:06 [DEBUG] evaluate generation 861: reward = -92.10, steps = 82\n",
      "09:30:07 [DEBUG] evaluate generation 862: reward = -91.18, steps = 85\n",
      "09:30:08 [DEBUG] evaluate generation 863: reward = -91.03, steps = 84\n",
      "09:30:09 [DEBUG] evaluate generation 864: reward = -93.03, steps = 78\n",
      "09:30:12 [DEBUG] evaluate generation 865: reward = -91.06, steps = 83\n",
      "09:30:14 [DEBUG] evaluate generation 866: reward = -90.90, steps = 84\n",
      "09:30:17 [DEBUG] evaluate generation 867: reward = -91.00, steps = 83\n",
      "09:30:20 [DEBUG] evaluate generation 868: reward = -90.30, steps = 84\n",
      "09:30:21 [DEBUG] evaluate generation 869: reward = -91.20, steps = 83\n",
      "09:30:22 [DEBUG] evaluate generation 870: reward = -91.46, steps = 83\n",
      "09:30:25 [DEBUG] evaluate generation 871: reward = -90.65, steps = 83\n",
      "09:30:26 [DEBUG] evaluate generation 872: reward = -91.15, steps = 83\n",
      "09:30:28 [DEBUG] evaluate generation 873: reward = -93.05, steps = 77\n",
      "09:30:30 [DEBUG] evaluate generation 874: reward = -92.72, steps = 80\n",
      "09:30:31 [DEBUG] evaluate generation 875: reward = -92.95, steps = 76\n",
      "09:30:32 [DEBUG] evaluate generation 876: reward = -91.45, steps = 84\n",
      "09:30:34 [DEBUG] evaluate generation 877: reward = -92.57, steps = 83\n",
      "09:30:35 [DEBUG] evaluate generation 878: reward = -91.54, steps = 84\n",
      "09:30:37 [DEBUG] evaluate generation 879: reward = -91.87, steps = 83\n",
      "09:30:38 [DEBUG] evaluate generation 880: reward = -92.90, steps = 80\n",
      "09:30:39 [DEBUG] evaluate generation 881: reward = -93.29, steps = 74\n",
      "09:30:41 [DEBUG] evaluate generation 882: reward = -92.58, steps = 80\n",
      "09:30:42 [DEBUG] evaluate generation 883: reward = -92.13, steps = 81\n",
      "09:30:43 [DEBUG] evaluate generation 884: reward = -92.20, steps = 82\n",
      "09:30:44 [DEBUG] evaluate generation 885: reward = -91.80, steps = 83\n",
      "09:30:45 [DEBUG] evaluate generation 886: reward = -91.53, steps = 81\n",
      "09:30:46 [DEBUG] evaluate generation 887: reward = -90.95, steps = 82\n",
      "09:30:47 [DEBUG] evaluate generation 888: reward = -93.25, steps = 80\n",
      "09:30:50 [DEBUG] evaluate generation 889: reward = -92.69, steps = 81\n",
      "09:30:51 [DEBUG] evaluate generation 890: reward = -93.14, steps = 79\n",
      "09:30:54 [DEBUG] evaluate generation 891: reward = -90.66, steps = 86\n",
      "09:30:56 [DEBUG] evaluate generation 892: reward = -90.49, steps = 84\n",
      "09:30:58 [DEBUG] evaluate generation 893: reward = -91.16, steps = 84\n",
      "09:30:59 [DEBUG] evaluate generation 894: reward = -90.27, steps = 88\n",
      "09:31:01 [DEBUG] evaluate generation 895: reward = -90.90, steps = 90\n",
      "09:31:03 [DEBUG] evaluate generation 896: reward = -90.51, steps = 92\n",
      "09:31:04 [DEBUG] evaluate generation 897: reward = -90.48, steps = 89\n",
      "09:31:05 [DEBUG] evaluate generation 898: reward = -90.50, steps = 94\n",
      "09:31:08 [DEBUG] evaluate generation 899: reward = -90.22, steps = 91\n",
      "09:31:10 [DEBUG] evaluate generation 900: reward = -90.82, steps = 88\n",
      "09:31:14 [DEBUG] evaluate generation 901: reward = -90.87, steps = 90\n",
      "09:31:17 [DEBUG] evaluate generation 902: reward = -90.47, steps = 90\n",
      "09:31:18 [DEBUG] evaluate generation 903: reward = -90.35, steps = 96\n",
      "09:31:21 [DEBUG] evaluate generation 904: reward = -92.27, steps = 92\n",
      "09:31:23 [DEBUG] evaluate generation 905: reward = -90.53, steps = 90\n",
      "09:31:24 [DEBUG] evaluate generation 906: reward = -90.30, steps = 92\n",
      "09:31:26 [DEBUG] evaluate generation 907: reward = -90.64, steps = 91\n",
      "09:31:27 [DEBUG] evaluate generation 908: reward = -90.44, steps = 91\n",
      "09:31:30 [DEBUG] evaluate generation 909: reward = -90.03, steps = 94\n",
      "09:31:32 [DEBUG] evaluate generation 910: reward = -90.74, steps = 88\n",
      "09:31:35 [DEBUG] evaluate generation 911: reward = -90.46, steps = 90\n",
      "09:31:38 [DEBUG] evaluate generation 912: reward = -90.53, steps = 88\n",
      "09:31:38 [DEBUG] evaluate generation 913: reward = -91.09, steps = 88\n",
      "09:31:40 [DEBUG] evaluate generation 914: reward = -90.27, steps = 90\n",
      "09:31:43 [DEBUG] evaluate generation 915: reward = -90.42, steps = 90\n",
      "09:31:44 [DEBUG] evaluate generation 916: reward = -91.08, steps = 87\n",
      "09:31:45 [DEBUG] evaluate generation 917: reward = -90.31, steps = 91\n",
      "09:31:47 [DEBUG] evaluate generation 918: reward = -89.59, steps = 95\n",
      "09:31:49 [DEBUG] evaluate generation 919: reward = -90.73, steps = 92\n",
      "09:31:51 [DEBUG] evaluate generation 920: reward = -90.49, steps = 91\n",
      "09:31:52 [DEBUG] evaluate generation 921: reward = -90.34, steps = 91\n",
      "09:31:54 [DEBUG] evaluate generation 922: reward = -91.23, steps = 87\n",
      "09:31:56 [DEBUG] evaluate generation 923: reward = -90.08, steps = 92\n",
      "09:31:57 [DEBUG] evaluate generation 924: reward = -90.52, steps = 93\n",
      "09:31:59 [DEBUG] evaluate generation 925: reward = -90.44, steps = 95\n",
      "09:32:00 [DEBUG] evaluate generation 926: reward = -90.15, steps = 91\n",
      "09:32:02 [DEBUG] evaluate generation 927: reward = -92.59, steps = 83\n",
      "09:32:04 [DEBUG] evaluate generation 928: reward = -90.62, steps = 94\n",
      "09:32:05 [DEBUG] evaluate generation 929: reward = -90.97, steps = 91\n",
      "09:32:09 [DEBUG] evaluate generation 930: reward = -90.38, steps = 92\n",
      "09:32:10 [DEBUG] evaluate generation 931: reward = -90.44, steps = 92\n",
      "09:32:12 [DEBUG] evaluate generation 932: reward = -90.39, steps = 95\n",
      "09:32:13 [DEBUG] evaluate generation 933: reward = -90.74, steps = 89\n",
      "09:32:14 [DEBUG] evaluate generation 934: reward = -92.98, steps = 82\n",
      "09:32:17 [DEBUG] evaluate generation 935: reward = -90.81, steps = 91\n",
      "09:32:19 [DEBUG] evaluate generation 936: reward = -93.57, steps = 82\n",
      "09:32:20 [DEBUG] evaluate generation 937: reward = -91.11, steps = 86\n",
      "09:32:22 [DEBUG] evaluate generation 938: reward = -91.15, steps = 87\n",
      "09:32:23 [DEBUG] evaluate generation 939: reward = -90.31, steps = 92\n",
      "09:32:26 [DEBUG] evaluate generation 940: reward = -90.19, steps = 92\n",
      "09:32:28 [DEBUG] evaluate generation 941: reward = -93.02, steps = 76\n",
      "09:32:29 [DEBUG] evaluate generation 942: reward = -90.27, steps = 92\n",
      "09:32:30 [DEBUG] evaluate generation 943: reward = -91.32, steps = 84\n",
      "09:32:31 [DEBUG] evaluate generation 944: reward = -90.45, steps = 93\n",
      "09:32:33 [DEBUG] evaluate generation 945: reward = -90.55, steps = 88\n",
      "09:32:36 [DEBUG] evaluate generation 946: reward = -90.82, steps = 86\n",
      "09:32:37 [DEBUG] evaluate generation 947: reward = -91.72, steps = 83\n",
      "09:32:39 [DEBUG] evaluate generation 948: reward = -90.88, steps = 85\n",
      "09:32:41 [DEBUG] evaluate generation 949: reward = -91.45, steps = 86\n",
      "09:32:42 [DEBUG] evaluate generation 950: reward = -90.86, steps = 87\n",
      "09:32:45 [DEBUG] evaluate generation 951: reward = -90.80, steps = 88\n",
      "09:32:46 [DEBUG] evaluate generation 952: reward = -91.35, steps = 84\n",
      "09:32:47 [DEBUG] evaluate generation 953: reward = -90.88, steps = 91\n",
      "09:32:48 [DEBUG] evaluate generation 954: reward = -90.55, steps = 92\n",
      "09:32:50 [DEBUG] evaluate generation 955: reward = -91.02, steps = 88\n",
      "09:32:51 [DEBUG] evaluate generation 956: reward = -90.17, steps = 95\n",
      "09:32:55 [DEBUG] evaluate generation 957: reward = -91.19, steps = 87\n",
      "09:32:57 [DEBUG] evaluate generation 958: reward = -90.05, steps = 92\n",
      "09:32:58 [DEBUG] evaluate generation 959: reward = -90.31, steps = 92\n",
      "09:33:00 [DEBUG] evaluate generation 960: reward = -90.47, steps = 92\n",
      "09:33:04 [DEBUG] evaluate generation 961: reward = -90.27, steps = 90\n",
      "09:33:06 [DEBUG] evaluate generation 962: reward = -90.65, steps = 88\n",
      "09:33:07 [DEBUG] evaluate generation 963: reward = -90.73, steps = 89\n",
      "09:33:09 [DEBUG] evaluate generation 964: reward = -90.71, steps = 89\n",
      "09:33:10 [DEBUG] evaluate generation 965: reward = -90.35, steps = 91\n",
      "09:33:12 [DEBUG] evaluate generation 966: reward = -89.89, steps = 90\n",
      "09:33:14 [DEBUG] evaluate generation 967: reward = -90.64, steps = 88\n",
      "09:33:16 [DEBUG] evaluate generation 968: reward = -90.58, steps = 86\n",
      "09:33:17 [DEBUG] evaluate generation 969: reward = -91.24, steps = 86\n",
      "09:33:18 [DEBUG] evaluate generation 970: reward = -90.25, steps = 88\n",
      "09:33:20 [DEBUG] evaluate generation 971: reward = -91.43, steps = 83\n",
      "09:33:21 [DEBUG] evaluate generation 972: reward = -90.31, steps = 88\n",
      "09:33:23 [DEBUG] evaluate generation 973: reward = -90.44, steps = 87\n",
      "09:33:25 [DEBUG] evaluate generation 974: reward = -91.07, steps = 85\n",
      "09:33:26 [DEBUG] evaluate generation 975: reward = -90.60, steps = 86\n",
      "09:33:27 [DEBUG] evaluate generation 976: reward = -90.49, steps = 88\n",
      "09:33:28 [DEBUG] evaluate generation 977: reward = -90.96, steps = 86\n",
      "09:33:30 [DEBUG] evaluate generation 978: reward = -91.14, steps = 86\n",
      "09:33:31 [DEBUG] evaluate generation 979: reward = -90.44, steps = 87\n",
      "09:33:32 [DEBUG] evaluate generation 980: reward = -91.16, steps = 89\n",
      "09:33:33 [DEBUG] evaluate generation 981: reward = -90.68, steps = 90\n",
      "09:33:35 [DEBUG] evaluate generation 982: reward = -90.92, steps = 91\n",
      "09:33:37 [DEBUG] evaluate generation 983: reward = -90.83, steps = 87\n",
      "09:33:38 [DEBUG] evaluate generation 984: reward = -90.17, steps = 92\n",
      "09:33:40 [DEBUG] evaluate generation 985: reward = -90.66, steps = 89\n",
      "09:33:41 [DEBUG] evaluate generation 986: reward = -90.66, steps = 89\n",
      "09:33:44 [DEBUG] evaluate generation 987: reward = -90.94, steps = 89\n",
      "09:33:47 [DEBUG] evaluate generation 988: reward = -91.44, steps = 88\n",
      "09:33:50 [DEBUG] evaluate generation 989: reward = -90.53, steps = 89\n",
      "09:33:51 [DEBUG] evaluate generation 990: reward = -90.65, steps = 91\n",
      "09:33:52 [DEBUG] evaluate generation 991: reward = -90.46, steps = 94\n",
      "09:33:53 [DEBUG] evaluate generation 992: reward = -90.30, steps = 96\n",
      "09:33:54 [DEBUG] evaluate generation 993: reward = -90.24, steps = 93\n",
      "09:33:56 [DEBUG] evaluate generation 994: reward = -90.74, steps = 93\n",
      "09:33:57 [DEBUG] evaluate generation 995: reward = -91.21, steps = 88\n",
      "09:33:59 [DEBUG] evaluate generation 996: reward = -90.79, steps = 92\n",
      "09:34:00 [DEBUG] evaluate generation 997: reward = -91.24, steps = 90\n",
      "09:34:02 [DEBUG] evaluate generation 998: reward = -90.48, steps = 100\n",
      "09:34:04 [DEBUG] evaluate generation 999: reward = -90.21, steps = 104\n",
      "09:34:05 [DEBUG] evaluate generation 1000: reward = -90.76, steps = 100\n",
      "09:34:08 [DEBUG] evaluate generation 1001: reward = -90.97, steps = 97\n",
      "09:34:10 [DEBUG] evaluate generation 1002: reward = -91.54, steps = 95\n",
      "09:34:11 [DEBUG] evaluate generation 1003: reward = -90.37, steps = 98\n",
      "09:34:13 [DEBUG] evaluate generation 1004: reward = -89.93, steps = 98\n",
      "09:34:15 [DEBUG] evaluate generation 1005: reward = -91.44, steps = 89\n",
      "09:34:16 [DEBUG] evaluate generation 1006: reward = -90.98, steps = 95\n",
      "09:34:17 [DEBUG] evaluate generation 1007: reward = -88.78, steps = 109\n",
      "09:34:19 [DEBUG] evaluate generation 1008: reward = -90.75, steps = 96\n",
      "09:34:20 [DEBUG] evaluate generation 1009: reward = -91.12, steps = 97\n",
      "09:34:22 [DEBUG] evaluate generation 1010: reward = -90.92, steps = 97\n",
      "09:34:24 [DEBUG] evaluate generation 1011: reward = -91.85, steps = 91\n",
      "09:34:25 [DEBUG] evaluate generation 1012: reward = -93.06, steps = 84\n",
      "09:34:27 [DEBUG] evaluate generation 1013: reward = -88.51, steps = 114\n",
      "09:34:28 [DEBUG] evaluate generation 1014: reward = -87.00, steps = 116\n",
      "09:34:29 [DEBUG] evaluate generation 1015: reward = -90.82, steps = 94\n",
      "09:34:31 [DEBUG] evaluate generation 1016: reward = -90.93, steps = 95\n",
      "09:34:34 [DEBUG] evaluate generation 1017: reward = -90.94, steps = 94\n",
      "09:34:39 [DEBUG] evaluate generation 1018: reward = -87.54, steps = 118\n",
      "09:34:41 [DEBUG] evaluate generation 1019: reward = -87.36, steps = 115\n",
      "09:34:42 [DEBUG] evaluate generation 1020: reward = -87.53, steps = 114\n",
      "09:34:43 [DEBUG] evaluate generation 1021: reward = -87.38, steps = 114\n",
      "09:34:45 [DEBUG] evaluate generation 1022: reward = -87.74, steps = 119\n",
      "09:34:46 [DEBUG] evaluate generation 1023: reward = -87.24, steps = 119\n",
      "09:34:51 [DEBUG] evaluate generation 1024: reward = -88.99, steps = 108\n",
      "09:34:52 [DEBUG] evaluate generation 1025: reward = -87.86, steps = 112\n",
      "09:34:54 [DEBUG] evaluate generation 1026: reward = -88.34, steps = 112\n",
      "09:34:56 [DEBUG] evaluate generation 1027: reward = -88.73, steps = 110\n",
      "09:34:57 [DEBUG] evaluate generation 1028: reward = -87.48, steps = 118\n",
      "09:35:01 [DEBUG] evaluate generation 1029: reward = -87.39, steps = 117\n",
      "09:35:04 [DEBUG] evaluate generation 1030: reward = -88.80, steps = 108\n",
      "09:35:05 [DEBUG] evaluate generation 1031: reward = -87.50, steps = 118\n",
      "09:35:07 [DEBUG] evaluate generation 1032: reward = -88.50, steps = 107\n",
      "09:35:09 [DEBUG] evaluate generation 1033: reward = -87.55, steps = 121\n",
      "09:35:11 [DEBUG] evaluate generation 1034: reward = -91.59, steps = 96\n",
      "09:35:12 [DEBUG] evaluate generation 1035: reward = -87.18, steps = 117\n",
      "09:35:14 [DEBUG] evaluate generation 1036: reward = -91.27, steps = 97\n",
      "09:35:15 [DEBUG] evaluate generation 1037: reward = -87.22, steps = 123\n",
      "09:35:18 [DEBUG] evaluate generation 1038: reward = -87.47, steps = 118\n",
      "09:35:20 [DEBUG] evaluate generation 1039: reward = -86.86, steps = 118\n",
      "09:35:21 [DEBUG] evaluate generation 1040: reward = -87.07, steps = 116\n",
      "09:35:22 [DEBUG] evaluate generation 1041: reward = -91.12, steps = 94\n",
      "09:35:24 [DEBUG] evaluate generation 1042: reward = -86.91, steps = 125\n",
      "09:35:25 [DEBUG] evaluate generation 1043: reward = -86.75, steps = 137\n",
      "09:35:26 [DEBUG] evaluate generation 1044: reward = -87.26, steps = 128\n",
      "09:35:29 [DEBUG] evaluate generation 1045: reward = -92.62, steps = 93\n",
      "09:35:32 [DEBUG] evaluate generation 1046: reward = -87.07, steps = 120\n",
      "09:35:36 [DEBUG] evaluate generation 1047: reward = -88.20, steps = 108\n",
      "09:35:37 [DEBUG] evaluate generation 1048: reward = -87.15, steps = 127\n",
      "09:35:38 [DEBUG] evaluate generation 1049: reward = -87.40, steps = 120\n",
      "09:35:40 [DEBUG] evaluate generation 1050: reward = -91.01, steps = 98\n",
      "09:35:43 [DEBUG] evaluate generation 1051: reward = -86.60, steps = 119\n",
      "09:35:44 [DEBUG] evaluate generation 1052: reward = -86.54, steps = 130\n",
      "09:35:48 [DEBUG] evaluate generation 1053: reward = -87.25, steps = 120\n",
      "09:35:50 [DEBUG] evaluate generation 1054: reward = -86.65, steps = 125\n",
      "09:35:52 [DEBUG] evaluate generation 1055: reward = -90.71, steps = 97\n",
      "09:35:54 [DEBUG] evaluate generation 1056: reward = -87.63, steps = 114\n",
      "09:35:56 [DEBUG] evaluate generation 1057: reward = -90.13, steps = 95\n",
      "09:35:57 [DEBUG] evaluate generation 1058: reward = -88.71, steps = 112\n",
      "09:35:58 [DEBUG] evaluate generation 1059: reward = -86.77, steps = 124\n",
      "09:36:01 [DEBUG] evaluate generation 1060: reward = -90.35, steps = 96\n",
      "09:36:02 [DEBUG] evaluate generation 1061: reward = -86.46, steps = 140\n",
      "09:36:04 [DEBUG] evaluate generation 1062: reward = -89.99, steps = 100\n",
      "09:36:06 [DEBUG] evaluate generation 1063: reward = -86.25, steps = 130\n",
      "09:36:08 [DEBUG] evaluate generation 1064: reward = -86.75, steps = 131\n",
      "09:36:09 [DEBUG] evaluate generation 1065: reward = -90.97, steps = 97\n",
      "09:36:11 [DEBUG] evaluate generation 1066: reward = -86.64, steps = 129\n",
      "09:36:12 [DEBUG] evaluate generation 1067: reward = -86.42, steps = 149\n",
      "09:36:17 [DEBUG] evaluate generation 1068: reward = -90.45, steps = 94\n",
      "09:36:19 [DEBUG] evaluate generation 1069: reward = -89.04, steps = 120\n",
      "09:36:20 [DEBUG] evaluate generation 1070: reward = -87.57, steps = 118\n",
      "09:36:21 [DEBUG] evaluate generation 1071: reward = -86.32, steps = 142\n",
      "09:36:24 [DEBUG] evaluate generation 1072: reward = -90.42, steps = 97\n",
      "09:36:25 [DEBUG] evaluate generation 1073: reward = -86.05, steps = 131\n",
      "09:36:26 [DEBUG] evaluate generation 1074: reward = -90.70, steps = 95\n",
      "09:36:29 [DEBUG] evaluate generation 1075: reward = -87.71, steps = 111\n",
      "09:36:30 [DEBUG] evaluate generation 1076: reward = -86.59, steps = 121\n",
      "09:36:32 [DEBUG] evaluate generation 1077: reward = -86.98, steps = 127\n",
      "09:36:33 [DEBUG] evaluate generation 1078: reward = -86.47, steps = 131\n",
      "09:36:35 [DEBUG] evaluate generation 1079: reward = -86.89, steps = 124\n",
      "09:36:39 [DEBUG] evaluate generation 1080: reward = -87.95, steps = 116\n",
      "09:36:40 [DEBUG] evaluate generation 1081: reward = -86.48, steps = 129\n",
      "09:36:42 [DEBUG] evaluate generation 1082: reward = -86.89, steps = 124\n",
      "09:36:44 [DEBUG] evaluate generation 1083: reward = -89.73, steps = 102\n",
      "09:36:46 [DEBUG] evaluate generation 1084: reward = -86.97, steps = 121\n",
      "09:36:48 [DEBUG] evaluate generation 1085: reward = -87.66, steps = 113\n",
      "09:36:50 [DEBUG] evaluate generation 1086: reward = -90.54, steps = 95\n",
      "09:36:51 [DEBUG] evaluate generation 1087: reward = -91.15, steps = 96\n",
      "09:36:53 [DEBUG] evaluate generation 1088: reward = -90.22, steps = 95\n",
      "09:36:56 [DEBUG] evaluate generation 1089: reward = -89.98, steps = 97\n",
      "09:36:57 [DEBUG] evaluate generation 1090: reward = -89.69, steps = 100\n",
      "09:37:01 [DEBUG] evaluate generation 1091: reward = -90.38, steps = 100\n",
      "09:37:02 [DEBUG] evaluate generation 1092: reward = -86.72, steps = 121\n",
      "09:37:05 [DEBUG] evaluate generation 1093: reward = -86.56, steps = 123\n",
      "09:37:07 [DEBUG] evaluate generation 1094: reward = -90.88, steps = 101\n",
      "09:37:09 [DEBUG] evaluate generation 1095: reward = -88.76, steps = 112\n",
      "09:37:10 [DEBUG] evaluate generation 1096: reward = -90.75, steps = 100\n",
      "09:37:13 [DEBUG] evaluate generation 1097: reward = -88.89, steps = 112\n",
      "09:37:15 [DEBUG] evaluate generation 1098: reward = -87.17, steps = 124\n",
      "09:37:16 [DEBUG] evaluate generation 1099: reward = -90.00, steps = 101\n",
      "09:37:17 [DEBUG] evaluate generation 1100: reward = -87.80, steps = 123\n",
      "09:37:19 [DEBUG] evaluate generation 1101: reward = -87.21, steps = 125\n",
      "09:37:20 [DEBUG] evaluate generation 1102: reward = -87.55, steps = 117\n",
      "09:37:22 [DEBUG] evaluate generation 1103: reward = -87.61, steps = 120\n",
      "09:37:24 [DEBUG] evaluate generation 1104: reward = -88.33, steps = 109\n",
      "09:37:25 [DEBUG] evaluate generation 1105: reward = -86.41, steps = 134\n",
      "09:37:27 [DEBUG] evaluate generation 1106: reward = -86.50, steps = 120\n",
      "09:37:29 [DEBUG] evaluate generation 1107: reward = -87.00, steps = 119\n",
      "09:37:30 [DEBUG] evaluate generation 1108: reward = -87.95, steps = 115\n",
      "09:37:32 [DEBUG] evaluate generation 1109: reward = -87.13, steps = 117\n",
      "09:37:33 [DEBUG] evaluate generation 1110: reward = -87.20, steps = 120\n",
      "09:37:36 [DEBUG] evaluate generation 1111: reward = -89.71, steps = 100\n",
      "09:37:37 [DEBUG] evaluate generation 1112: reward = -87.45, steps = 114\n",
      "09:37:38 [DEBUG] evaluate generation 1113: reward = -86.69, steps = 128\n",
      "09:37:39 [DEBUG] evaluate generation 1114: reward = -86.86, steps = 129\n",
      "09:37:40 [DEBUG] evaluate generation 1115: reward = -86.74, steps = 127\n",
      "09:37:43 [DEBUG] evaluate generation 1116: reward = -86.88, steps = 127\n",
      "09:37:45 [DEBUG] evaluate generation 1117: reward = -86.13, steps = 132\n",
      "09:37:46 [DEBUG] evaluate generation 1118: reward = -87.25, steps = 130\n",
      "09:37:47 [DEBUG] evaluate generation 1119: reward = -86.45, steps = 142\n",
      "09:37:49 [DEBUG] evaluate generation 1120: reward = -86.75, steps = 130\n",
      "09:37:50 [DEBUG] evaluate generation 1121: reward = -86.94, steps = 122\n",
      "09:37:53 [DEBUG] evaluate generation 1122: reward = -86.63, steps = 128\n",
      "09:37:56 [DEBUG] evaluate generation 1123: reward = -87.43, steps = 124\n",
      "09:37:57 [DEBUG] evaluate generation 1124: reward = -86.44, steps = 128\n",
      "09:37:59 [DEBUG] evaluate generation 1125: reward = -86.70, steps = 128\n",
      "09:38:00 [DEBUG] evaluate generation 1126: reward = -86.71, steps = 134\n",
      "09:38:01 [DEBUG] evaluate generation 1127: reward = -86.42, steps = 134\n",
      "09:38:05 [DEBUG] evaluate generation 1128: reward = -86.57, steps = 135\n",
      "09:38:07 [DEBUG] evaluate generation 1129: reward = -86.40, steps = 126\n",
      "09:38:09 [DEBUG] evaluate generation 1130: reward = -87.73, steps = 116\n",
      "09:38:12 [DEBUG] evaluate generation 1131: reward = -87.34, steps = 117\n",
      "09:38:13 [DEBUG] evaluate generation 1132: reward = -87.01, steps = 126\n",
      "09:38:14 [DEBUG] evaluate generation 1133: reward = -86.54, steps = 136\n",
      "09:38:15 [DEBUG] evaluate generation 1134: reward = -85.89, steps = 141\n",
      "09:38:18 [DEBUG] evaluate generation 1135: reward = -86.63, steps = 134\n",
      "09:38:19 [DEBUG] evaluate generation 1136: reward = -85.81, steps = 144\n",
      "09:38:21 [DEBUG] evaluate generation 1137: reward = -86.41, steps = 133\n",
      "09:38:23 [DEBUG] evaluate generation 1138: reward = -86.68, steps = 148\n",
      "09:38:26 [DEBUG] evaluate generation 1139: reward = -86.50, steps = 141\n",
      "09:38:29 [DEBUG] evaluate generation 1140: reward = -86.61, steps = 136\n",
      "09:38:30 [DEBUG] evaluate generation 1141: reward = -86.18, steps = 154\n",
      "09:38:32 [DEBUG] evaluate generation 1142: reward = -86.12, steps = 137\n",
      "09:38:33 [DEBUG] evaluate generation 1143: reward = -86.33, steps = 141\n",
      "09:38:35 [DEBUG] evaluate generation 1144: reward = -86.33, steps = 159\n",
      "09:38:37 [DEBUG] evaluate generation 1145: reward = -85.87, steps = 154\n",
      "09:38:39 [DEBUG] evaluate generation 1146: reward = -85.89, steps = 157\n",
      "09:38:41 [DEBUG] evaluate generation 1147: reward = -85.95, steps = 148\n",
      "09:38:42 [DEBUG] evaluate generation 1148: reward = -86.21, steps = 158\n",
      "09:38:44 [DEBUG] evaluate generation 1149: reward = -85.95, steps = 153\n",
      "09:38:47 [DEBUG] evaluate generation 1150: reward = -86.23, steps = 146\n",
      "09:38:49 [DEBUG] evaluate generation 1151: reward = -87.03, steps = 127\n",
      "09:38:51 [DEBUG] evaluate generation 1152: reward = -86.66, steps = 130\n",
      "09:38:53 [DEBUG] evaluate generation 1153: reward = -87.35, steps = 132\n",
      "09:38:54 [DEBUG] evaluate generation 1154: reward = -86.35, steps = 148\n",
      "09:38:57 [DEBUG] evaluate generation 1155: reward = -86.87, steps = 134\n",
      "09:39:00 [DEBUG] evaluate generation 1156: reward = -87.23, steps = 126\n",
      "09:39:01 [DEBUG] evaluate generation 1157: reward = -87.37, steps = 126\n",
      "09:39:02 [DEBUG] evaluate generation 1158: reward = -86.75, steps = 140\n",
      "09:39:04 [DEBUG] evaluate generation 1159: reward = -86.19, steps = 145\n",
      "09:39:07 [DEBUG] evaluate generation 1160: reward = -86.97, steps = 128\n",
      "09:39:09 [DEBUG] evaluate generation 1161: reward = -86.30, steps = 136\n",
      "09:39:10 [DEBUG] evaluate generation 1162: reward = -86.12, steps = 157\n",
      "09:39:12 [DEBUG] evaluate generation 1163: reward = -86.32, steps = 154\n",
      "09:39:14 [DEBUG] evaluate generation 1164: reward = -87.20, steps = 129\n",
      "09:39:17 [DEBUG] evaluate generation 1165: reward = -87.32, steps = 118\n",
      "09:39:19 [DEBUG] evaluate generation 1166: reward = -86.93, steps = 121\n",
      "09:39:21 [DEBUG] evaluate generation 1167: reward = -87.97, steps = 114\n",
      "09:39:23 [DEBUG] evaluate generation 1168: reward = -89.26, steps = 102\n",
      "09:39:24 [DEBUG] evaluate generation 1169: reward = -87.92, steps = 112\n",
      "09:39:25 [DEBUG] evaluate generation 1170: reward = -88.03, steps = 116\n",
      "09:39:28 [DEBUG] evaluate generation 1171: reward = -87.42, steps = 119\n",
      "09:39:29 [DEBUG] evaluate generation 1172: reward = -86.72, steps = 134\n",
      "09:39:31 [DEBUG] evaluate generation 1173: reward = -87.20, steps = 120\n",
      "09:39:32 [DEBUG] evaluate generation 1174: reward = -86.79, steps = 141\n",
      "09:39:34 [DEBUG] evaluate generation 1175: reward = -86.96, steps = 128\n",
      "09:39:36 [DEBUG] evaluate generation 1176: reward = -88.21, steps = 123\n",
      "09:39:37 [DEBUG] evaluate generation 1177: reward = -86.87, steps = 134\n",
      "09:39:39 [DEBUG] evaluate generation 1178: reward = -86.71, steps = 133\n",
      "09:39:40 [DEBUG] evaluate generation 1179: reward = -86.26, steps = 149\n",
      "09:39:41 [DEBUG] evaluate generation 1180: reward = -86.98, steps = 145\n",
      "09:39:42 [DEBUG] evaluate generation 1181: reward = -87.11, steps = 123\n",
      "09:39:44 [DEBUG] evaluate generation 1182: reward = -86.38, steps = 140\n",
      "09:39:46 [DEBUG] evaluate generation 1183: reward = -86.22, steps = 136\n",
      "09:39:48 [DEBUG] evaluate generation 1184: reward = -86.59, steps = 136\n",
      "09:39:49 [DEBUG] evaluate generation 1185: reward = -86.17, steps = 129\n",
      "09:39:50 [DEBUG] evaluate generation 1186: reward = -85.91, steps = 154\n",
      "09:39:52 [DEBUG] evaluate generation 1187: reward = -86.23, steps = 158\n",
      "09:39:54 [DEBUG] evaluate generation 1188: reward = -86.63, steps = 134\n",
      "09:39:56 [DEBUG] evaluate generation 1189: reward = -86.13, steps = 148\n",
      "09:39:58 [DEBUG] evaluate generation 1190: reward = -86.47, steps = 135\n",
      "09:40:00 [DEBUG] evaluate generation 1191: reward = -86.33, steps = 139\n",
      "09:40:02 [DEBUG] evaluate generation 1192: reward = -86.30, steps = 139\n",
      "09:40:03 [DEBUG] evaluate generation 1193: reward = -86.77, steps = 154\n",
      "09:40:04 [DEBUG] evaluate generation 1194: reward = -86.21, steps = 134\n",
      "09:40:05 [DEBUG] evaluate generation 1195: reward = -86.13, steps = 138\n",
      "09:40:07 [DEBUG] evaluate generation 1196: reward = -86.28, steps = 136\n",
      "09:40:09 [DEBUG] evaluate generation 1197: reward = -86.16, steps = 145\n",
      "09:40:10 [DEBUG] evaluate generation 1198: reward = -86.44, steps = 131\n",
      "09:40:11 [DEBUG] evaluate generation 1199: reward = -86.76, steps = 151\n",
      "09:40:13 [DEBUG] evaluate generation 1200: reward = -86.66, steps = 158\n",
      "09:40:15 [DEBUG] evaluate generation 1201: reward = -86.37, steps = 149\n",
      "09:40:18 [DEBUG] evaluate generation 1202: reward = -86.05, steps = 143\n",
      "09:40:20 [DEBUG] evaluate generation 1203: reward = -86.11, steps = 148\n",
      "09:40:22 [DEBUG] evaluate generation 1204: reward = -86.61, steps = 132\n",
      "09:40:25 [DEBUG] evaluate generation 1205: reward = -86.96, steps = 152\n",
      "09:40:27 [DEBUG] evaluate generation 1206: reward = -86.61, steps = 145\n",
      "09:40:28 [DEBUG] evaluate generation 1207: reward = -86.13, steps = 149\n",
      "09:40:30 [DEBUG] evaluate generation 1208: reward = -86.42, steps = 148\n",
      "09:40:33 [DEBUG] evaluate generation 1209: reward = -86.34, steps = 142\n",
      "09:40:34 [DEBUG] evaluate generation 1210: reward = -86.70, steps = 145\n",
      "09:40:37 [DEBUG] evaluate generation 1211: reward = -86.46, steps = 145\n",
      "09:40:40 [DEBUG] evaluate generation 1212: reward = -88.03, steps = 147\n",
      "09:40:43 [DEBUG] evaluate generation 1213: reward = -87.13, steps = 138\n",
      "09:40:44 [DEBUG] evaluate generation 1214: reward = -86.88, steps = 140\n",
      "09:40:46 [DEBUG] evaluate generation 1215: reward = -86.91, steps = 144\n",
      "09:40:47 [DEBUG] evaluate generation 1216: reward = -87.22, steps = 128\n",
      "09:40:49 [DEBUG] evaluate generation 1217: reward = -86.71, steps = 138\n",
      "09:40:50 [DEBUG] evaluate generation 1218: reward = -86.22, steps = 138\n",
      "09:40:52 [DEBUG] evaluate generation 1219: reward = -86.97, steps = 142\n",
      "09:40:54 [DEBUG] evaluate generation 1220: reward = -87.04, steps = 160\n",
      "09:40:55 [DEBUG] evaluate generation 1221: reward = -86.86, steps = 145\n",
      "09:40:56 [DEBUG] evaluate generation 1222: reward = -87.04, steps = 158\n",
      "09:40:59 [DEBUG] evaluate generation 1223: reward = -86.77, steps = 158\n",
      "09:41:00 [DEBUG] evaluate generation 1224: reward = -87.09, steps = 157\n",
      "09:41:03 [DEBUG] evaluate generation 1225: reward = -86.83, steps = 149\n",
      "09:41:05 [DEBUG] evaluate generation 1226: reward = -86.51, steps = 155\n",
      "09:41:08 [DEBUG] evaluate generation 1227: reward = -86.57, steps = 153\n",
      "09:41:09 [DEBUG] evaluate generation 1228: reward = -86.53, steps = 152\n",
      "09:41:12 [DEBUG] evaluate generation 1229: reward = -86.79, steps = 155\n",
      "09:41:13 [DEBUG] evaluate generation 1230: reward = -86.72, steps = 153\n",
      "09:41:16 [DEBUG] evaluate generation 1231: reward = -86.94, steps = 137\n",
      "09:41:20 [DEBUG] evaluate generation 1232: reward = -86.79, steps = 152\n",
      "09:41:21 [DEBUG] evaluate generation 1233: reward = -86.59, steps = 146\n",
      "09:41:23 [DEBUG] evaluate generation 1234: reward = -86.79, steps = 147\n",
      "09:41:26 [DEBUG] evaluate generation 1235: reward = -88.07, steps = 140\n",
      "09:41:28 [DEBUG] evaluate generation 1236: reward = -86.38, steps = 145\n",
      "09:41:29 [DEBUG] evaluate generation 1237: reward = -86.79, steps = 151\n",
      "09:41:31 [DEBUG] evaluate generation 1238: reward = -86.43, steps = 150\n",
      "09:41:32 [DEBUG] evaluate generation 1239: reward = -86.80, steps = 155\n",
      "09:41:35 [DEBUG] evaluate generation 1240: reward = -85.75, steps = 149\n",
      "09:41:37 [DEBUG] evaluate generation 1241: reward = -87.13, steps = 165\n",
      "09:41:39 [DEBUG] evaluate generation 1242: reward = -87.25, steps = 143\n",
      "09:41:41 [DEBUG] evaluate generation 1243: reward = -85.95, steps = 141\n",
      "09:41:43 [DEBUG] evaluate generation 1244: reward = -86.18, steps = 147\n",
      "09:41:44 [DEBUG] evaluate generation 1245: reward = -87.07, steps = 151\n",
      "09:41:46 [DEBUG] evaluate generation 1246: reward = -86.14, steps = 146\n",
      "09:41:47 [DEBUG] evaluate generation 1247: reward = -86.76, steps = 155\n",
      "09:41:49 [DEBUG] evaluate generation 1248: reward = -86.32, steps = 146\n",
      "09:41:52 [DEBUG] evaluate generation 1249: reward = -86.55, steps = 133\n",
      "09:41:55 [DEBUG] evaluate generation 1250: reward = -86.45, steps = 139\n",
      "09:41:56 [DEBUG] evaluate generation 1251: reward = -86.29, steps = 142\n",
      "09:41:57 [DEBUG] evaluate generation 1252: reward = -87.68, steps = 133\n",
      "09:42:00 [DEBUG] evaluate generation 1253: reward = -86.45, steps = 140\n",
      "09:42:01 [DEBUG] evaluate generation 1254: reward = -86.26, steps = 148\n",
      "09:42:03 [DEBUG] evaluate generation 1255: reward = -86.61, steps = 148\n",
      "09:42:04 [DEBUG] evaluate generation 1256: reward = -86.21, steps = 153\n",
      "09:42:07 [DEBUG] evaluate generation 1257: reward = -86.68, steps = 143\n",
      "09:42:08 [DEBUG] evaluate generation 1258: reward = -86.44, steps = 152\n",
      "09:42:09 [DEBUG] evaluate generation 1259: reward = -86.60, steps = 143\n",
      "09:42:11 [DEBUG] evaluate generation 1260: reward = -86.51, steps = 145\n",
      "09:42:13 [DEBUG] evaluate generation 1261: reward = -86.72, steps = 142\n",
      "09:42:14 [DEBUG] evaluate generation 1262: reward = -86.30, steps = 152\n",
      "09:42:16 [DEBUG] evaluate generation 1263: reward = -86.36, steps = 155\n",
      "09:42:18 [DEBUG] evaluate generation 1264: reward = -87.10, steps = 134\n",
      "09:42:19 [DEBUG] evaluate generation 1265: reward = -86.57, steps = 160\n",
      "09:42:21 [DEBUG] evaluate generation 1266: reward = -86.56, steps = 152\n",
      "09:42:23 [DEBUG] evaluate generation 1267: reward = -86.63, steps = 149\n",
      "09:42:26 [DEBUG] evaluate generation 1268: reward = -86.82, steps = 145\n",
      "09:42:29 [DEBUG] evaluate generation 1269: reward = -86.94, steps = 137\n",
      "09:42:31 [DEBUG] evaluate generation 1270: reward = -86.53, steps = 153\n",
      "09:42:33 [DEBUG] evaluate generation 1271: reward = -86.32, steps = 133\n",
      "09:42:34 [DEBUG] evaluate generation 1272: reward = -86.52, steps = 137\n",
      "09:42:36 [DEBUG] evaluate generation 1273: reward = -87.09, steps = 157\n",
      "09:42:37 [DEBUG] evaluate generation 1274: reward = -86.44, steps = 145\n",
      "09:42:39 [DEBUG] evaluate generation 1275: reward = -86.60, steps = 140\n",
      "09:42:41 [DEBUG] evaluate generation 1276: reward = -86.69, steps = 161\n",
      "09:42:43 [DEBUG] evaluate generation 1277: reward = -86.83, steps = 155\n",
      "09:42:45 [DEBUG] evaluate generation 1278: reward = -86.33, steps = 157\n",
      "09:42:47 [DEBUG] evaluate generation 1279: reward = -86.29, steps = 150\n",
      "09:42:49 [DEBUG] evaluate generation 1280: reward = -86.13, steps = 141\n",
      "09:42:51 [DEBUG] evaluate generation 1281: reward = -86.63, steps = 133\n",
      "09:42:54 [DEBUG] evaluate generation 1282: reward = -86.95, steps = 147\n",
      "09:42:55 [DEBUG] evaluate generation 1283: reward = -87.98, steps = 126\n",
      "09:42:57 [DEBUG] evaluate generation 1284: reward = -86.94, steps = 138\n",
      "09:42:58 [DEBUG] evaluate generation 1285: reward = -86.61, steps = 137\n",
      "09:43:01 [DEBUG] evaluate generation 1286: reward = -86.72, steps = 148\n",
      "09:43:02 [DEBUG] evaluate generation 1287: reward = -86.45, steps = 155\n",
      "09:43:06 [DEBUG] evaluate generation 1288: reward = -86.83, steps = 144\n",
      "09:43:07 [DEBUG] evaluate generation 1289: reward = -86.68, steps = 146\n",
      "09:43:09 [DEBUG] evaluate generation 1290: reward = -86.63, steps = 145\n",
      "09:43:11 [DEBUG] evaluate generation 1291: reward = -86.75, steps = 160\n",
      "09:43:13 [DEBUG] evaluate generation 1292: reward = -87.40, steps = 155\n",
      "09:43:14 [DEBUG] evaluate generation 1293: reward = -86.50, steps = 143\n",
      "09:43:16 [DEBUG] evaluate generation 1294: reward = -86.76, steps = 148\n",
      "09:43:17 [DEBUG] evaluate generation 1295: reward = -86.84, steps = 155\n",
      "09:43:19 [DEBUG] evaluate generation 1296: reward = -87.13, steps = 169\n",
      "09:43:22 [DEBUG] evaluate generation 1297: reward = -86.88, steps = 145\n",
      "09:43:23 [DEBUG] evaluate generation 1298: reward = -86.26, steps = 147\n",
      "09:43:28 [DEBUG] evaluate generation 1299: reward = -86.37, steps = 149\n",
      "09:43:29 [DEBUG] evaluate generation 1300: reward = -86.42, steps = 147\n",
      "09:43:31 [DEBUG] evaluate generation 1301: reward = -86.67, steps = 151\n",
      "09:43:35 [DEBUG] evaluate generation 1302: reward = -87.17, steps = 151\n",
      "09:43:37 [DEBUG] evaluate generation 1303: reward = -86.60, steps = 144\n",
      "09:43:39 [DEBUG] evaluate generation 1304: reward = -87.18, steps = 136\n",
      "09:43:40 [DEBUG] evaluate generation 1305: reward = -86.63, steps = 144\n",
      "09:43:42 [DEBUG] evaluate generation 1306: reward = -86.79, steps = 142\n",
      "09:43:44 [DEBUG] evaluate generation 1307: reward = -86.78, steps = 137\n",
      "09:43:47 [DEBUG] evaluate generation 1308: reward = -88.16, steps = 138\n",
      "09:43:49 [DEBUG] evaluate generation 1309: reward = -87.77, steps = 141\n",
      "09:43:51 [DEBUG] evaluate generation 1310: reward = -87.26, steps = 141\n",
      "09:43:53 [DEBUG] evaluate generation 1311: reward = -87.31, steps = 152\n",
      "09:43:54 [DEBUG] evaluate generation 1312: reward = -86.59, steps = 141\n",
      "09:43:55 [DEBUG] evaluate generation 1313: reward = -86.37, steps = 154\n",
      "09:43:58 [DEBUG] evaluate generation 1314: reward = -86.16, steps = 155\n",
      "09:44:00 [DEBUG] evaluate generation 1315: reward = -86.05, steps = 151\n",
      "09:44:01 [DEBUG] evaluate generation 1316: reward = -86.21, steps = 153\n",
      "09:44:04 [DEBUG] evaluate generation 1317: reward = -86.23, steps = 146\n",
      "09:44:07 [DEBUG] evaluate generation 1318: reward = -86.62, steps = 139\n",
      "09:44:09 [DEBUG] evaluate generation 1319: reward = -86.17, steps = 142\n",
      "09:44:10 [DEBUG] evaluate generation 1320: reward = -86.40, steps = 148\n",
      "09:44:12 [DEBUG] evaluate generation 1321: reward = -87.05, steps = 150\n",
      "09:44:13 [DEBUG] evaluate generation 1322: reward = -86.35, steps = 162\n",
      "09:44:15 [DEBUG] evaluate generation 1323: reward = -86.15, steps = 150\n",
      "09:44:16 [DEBUG] evaluate generation 1324: reward = -86.82, steps = 156\n",
      "09:44:19 [DEBUG] evaluate generation 1325: reward = -86.37, steps = 162\n",
      "09:44:21 [DEBUG] evaluate generation 1326: reward = -86.89, steps = 149\n",
      "09:44:22 [DEBUG] evaluate generation 1327: reward = -86.50, steps = 151\n",
      "09:44:23 [DEBUG] evaluate generation 1328: reward = -86.40, steps = 170\n",
      "09:44:26 [DEBUG] evaluate generation 1329: reward = -85.94, steps = 146\n",
      "09:44:27 [DEBUG] evaluate generation 1330: reward = -86.31, steps = 147\n",
      "09:44:28 [DEBUG] evaluate generation 1331: reward = -86.29, steps = 176\n",
      "09:44:30 [DEBUG] evaluate generation 1332: reward = -86.20, steps = 149\n",
      "09:44:31 [DEBUG] evaluate generation 1333: reward = -85.97, steps = 151\n",
      "09:44:33 [DEBUG] evaluate generation 1334: reward = -86.16, steps = 165\n",
      "09:44:35 [DEBUG] evaluate generation 1335: reward = -86.19, steps = 162\n",
      "09:44:36 [DEBUG] evaluate generation 1336: reward = -86.41, steps = 154\n",
      "09:44:40 [DEBUG] evaluate generation 1337: reward = -85.79, steps = 158\n",
      "09:44:41 [DEBUG] evaluate generation 1338: reward = -85.94, steps = 156\n",
      "09:44:42 [DEBUG] evaluate generation 1339: reward = -86.15, steps = 149\n",
      "09:44:44 [DEBUG] evaluate generation 1340: reward = -86.23, steps = 160\n",
      "09:44:46 [DEBUG] evaluate generation 1341: reward = -86.35, steps = 150\n",
      "09:44:47 [DEBUG] evaluate generation 1342: reward = -86.15, steps = 150\n",
      "09:44:50 [DEBUG] evaluate generation 1343: reward = -86.18, steps = 171\n",
      "09:44:52 [DEBUG] evaluate generation 1344: reward = -86.13, steps = 158\n",
      "09:44:54 [DEBUG] evaluate generation 1345: reward = -86.23, steps = 154\n",
      "09:44:55 [DEBUG] evaluate generation 1346: reward = -86.05, steps = 149\n",
      "09:44:58 [DEBUG] evaluate generation 1347: reward = -86.12, steps = 150\n",
      "09:45:01 [DEBUG] evaluate generation 1348: reward = -86.30, steps = 156\n",
      "09:45:03 [DEBUG] evaluate generation 1349: reward = -86.90, steps = 148\n",
      "09:45:04 [DEBUG] evaluate generation 1350: reward = -86.29, steps = 150\n",
      "09:45:06 [DEBUG] evaluate generation 1351: reward = -86.47, steps = 158\n",
      "09:45:07 [DEBUG] evaluate generation 1352: reward = -86.38, steps = 153\n",
      "09:45:10 [DEBUG] evaluate generation 1353: reward = -86.36, steps = 152\n",
      "09:45:12 [DEBUG] evaluate generation 1354: reward = -85.81, steps = 150\n",
      "09:45:15 [DEBUG] evaluate generation 1355: reward = -86.19, steps = 148\n",
      "09:45:16 [DEBUG] evaluate generation 1356: reward = -86.24, steps = 145\n",
      "09:45:18 [DEBUG] evaluate generation 1357: reward = -85.99, steps = 144\n",
      "09:45:20 [DEBUG] evaluate generation 1358: reward = -86.17, steps = 140\n",
      "09:45:23 [DEBUG] evaluate generation 1359: reward = -86.36, steps = 141\n",
      "09:45:25 [DEBUG] evaluate generation 1360: reward = -86.47, steps = 143\n",
      "09:45:27 [DEBUG] evaluate generation 1361: reward = -86.08, steps = 147\n",
      "09:45:29 [DEBUG] evaluate generation 1362: reward = -86.26, steps = 146\n",
      "09:45:31 [DEBUG] evaluate generation 1363: reward = -86.14, steps = 159\n",
      "09:45:32 [DEBUG] evaluate generation 1364: reward = -86.15, steps = 151\n",
      "09:45:34 [DEBUG] evaluate generation 1365: reward = -86.55, steps = 139\n",
      "09:45:35 [DEBUG] evaluate generation 1366: reward = -86.35, steps = 142\n",
      "09:45:37 [DEBUG] evaluate generation 1367: reward = -86.43, steps = 152\n",
      "09:45:39 [DEBUG] evaluate generation 1368: reward = -85.81, steps = 146\n",
      "09:45:41 [DEBUG] evaluate generation 1369: reward = -85.72, steps = 159\n",
      "09:45:43 [DEBUG] evaluate generation 1370: reward = -86.08, steps = 158\n",
      "09:45:45 [DEBUG] evaluate generation 1371: reward = -85.94, steps = 160\n",
      "09:45:46 [DEBUG] evaluate generation 1372: reward = -86.44, steps = 167\n",
      "09:45:49 [DEBUG] evaluate generation 1373: reward = -85.90, steps = 149\n",
      "09:45:51 [DEBUG] evaluate generation 1374: reward = -86.11, steps = 162\n",
      "09:45:54 [DEBUG] evaluate generation 1375: reward = -86.31, steps = 149\n",
      "09:45:56 [DEBUG] evaluate generation 1376: reward = -87.66, steps = 149\n",
      "09:45:58 [DEBUG] evaluate generation 1377: reward = -86.46, steps = 144\n",
      "09:46:02 [DEBUG] evaluate generation 1378: reward = -86.16, steps = 147\n",
      "09:46:03 [DEBUG] evaluate generation 1379: reward = -86.06, steps = 157\n",
      "09:46:07 [DEBUG] evaluate generation 1380: reward = -86.33, steps = 150\n",
      "09:46:09 [DEBUG] evaluate generation 1381: reward = -85.85, steps = 158\n",
      "09:46:11 [DEBUG] evaluate generation 1382: reward = -86.34, steps = 150\n",
      "09:46:13 [DEBUG] evaluate generation 1383: reward = -86.41, steps = 146\n",
      "09:46:14 [DEBUG] evaluate generation 1384: reward = -85.79, steps = 150\n",
      "09:46:17 [DEBUG] evaluate generation 1385: reward = -88.13, steps = 147\n",
      "09:46:19 [DEBUG] evaluate generation 1386: reward = -85.77, steps = 149\n",
      "09:46:20 [DEBUG] evaluate generation 1387: reward = -86.41, steps = 157\n",
      "09:46:22 [DEBUG] evaluate generation 1388: reward = -85.82, steps = 168\n",
      "09:46:23 [DEBUG] evaluate generation 1389: reward = -86.15, steps = 153\n",
      "09:46:25 [DEBUG] evaluate generation 1390: reward = -85.83, steps = 151\n",
      "09:46:29 [DEBUG] evaluate generation 1391: reward = -86.51, steps = 146\n",
      "09:46:30 [DEBUG] evaluate generation 1392: reward = -85.65, steps = 153\n",
      "09:46:31 [DEBUG] evaluate generation 1393: reward = -85.86, steps = 157\n",
      "09:46:34 [DEBUG] evaluate generation 1394: reward = -85.76, steps = 144\n",
      "09:46:37 [DEBUG] evaluate generation 1395: reward = -86.32, steps = 148\n",
      "09:46:39 [DEBUG] evaluate generation 1396: reward = -86.46, steps = 146\n",
      "09:46:41 [DEBUG] evaluate generation 1397: reward = -85.93, steps = 142\n",
      "09:46:42 [DEBUG] evaluate generation 1398: reward = -86.36, steps = 149\n",
      "09:46:44 [DEBUG] evaluate generation 1399: reward = -86.10, steps = 149\n",
      "09:46:45 [DEBUG] evaluate generation 1400: reward = -86.05, steps = 151\n",
      "09:46:47 [DEBUG] evaluate generation 1401: reward = -85.69, steps = 148\n",
      "09:46:48 [DEBUG] evaluate generation 1402: reward = -88.54, steps = 144\n",
      "09:46:50 [DEBUG] evaluate generation 1403: reward = -85.68, steps = 150\n",
      "09:46:52 [DEBUG] evaluate generation 1404: reward = -87.09, steps = 142\n",
      "09:46:53 [DEBUG] evaluate generation 1405: reward = -85.72, steps = 147\n",
      "09:46:56 [DEBUG] evaluate generation 1406: reward = -85.96, steps = 149\n",
      "09:46:57 [DEBUG] evaluate generation 1407: reward = -86.09, steps = 159\n",
      "09:47:00 [DEBUG] evaluate generation 1408: reward = -86.36, steps = 144\n",
      "09:47:02 [DEBUG] evaluate generation 1409: reward = -88.38, steps = 148\n",
      "09:47:03 [DEBUG] evaluate generation 1410: reward = -85.76, steps = 152\n",
      "09:47:04 [DEBUG] evaluate generation 1411: reward = -85.73, steps = 163\n",
      "09:47:07 [DEBUG] evaluate generation 1412: reward = -85.62, steps = 151\n",
      "09:47:10 [DEBUG] evaluate generation 1413: reward = -85.60, steps = 154\n",
      "09:47:12 [DEBUG] evaluate generation 1414: reward = -86.35, steps = 149\n",
      "09:47:13 [DEBUG] evaluate generation 1415: reward = -85.20, steps = 152\n",
      "09:47:17 [DEBUG] evaluate generation 1416: reward = -85.58, steps = 149\n",
      "09:47:19 [DEBUG] evaluate generation 1417: reward = -85.62, steps = 148\n",
      "09:47:21 [DEBUG] evaluate generation 1418: reward = -85.49, steps = 165\n",
      "09:47:22 [DEBUG] evaluate generation 1419: reward = -85.11, steps = 154\n",
      "09:47:25 [DEBUG] evaluate generation 1420: reward = -85.16, steps = 150\n",
      "09:47:26 [DEBUG] evaluate generation 1421: reward = -85.44, steps = 148\n",
      "09:47:29 [DEBUG] evaluate generation 1422: reward = -85.17, steps = 144\n",
      "09:47:31 [DEBUG] evaluate generation 1423: reward = -85.06, steps = 158\n",
      "09:47:33 [DEBUG] evaluate generation 1424: reward = -85.50, steps = 147\n",
      "09:47:37 [DEBUG] evaluate generation 1425: reward = -86.39, steps = 143\n",
      "09:47:38 [DEBUG] evaluate generation 1426: reward = -85.78, steps = 157\n",
      "09:47:39 [DEBUG] evaluate generation 1427: reward = -86.91, steps = 148\n",
      "09:47:41 [DEBUG] evaluate generation 1428: reward = -86.83, steps = 137\n",
      "09:47:43 [DEBUG] evaluate generation 1429: reward = -84.97, steps = 139\n",
      "09:47:46 [DEBUG] evaluate generation 1430: reward = -85.67, steps = 152\n",
      "09:47:48 [DEBUG] evaluate generation 1431: reward = -85.28, steps = 144\n",
      "09:47:49 [DEBUG] evaluate generation 1432: reward = -86.18, steps = 145\n",
      "09:47:50 [DEBUG] evaluate generation 1433: reward = -85.80, steps = 148\n",
      "09:47:52 [DEBUG] evaluate generation 1434: reward = -85.50, steps = 154\n",
      "09:47:54 [DEBUG] evaluate generation 1435: reward = -85.43, steps = 151\n",
      "09:47:56 [DEBUG] evaluate generation 1436: reward = -85.39, steps = 154\n",
      "09:47:57 [DEBUG] evaluate generation 1437: reward = -85.51, steps = 153\n",
      "09:47:59 [DEBUG] evaluate generation 1438: reward = -85.27, steps = 158\n",
      "09:48:01 [DEBUG] evaluate generation 1439: reward = -85.42, steps = 153\n",
      "09:48:03 [DEBUG] evaluate generation 1440: reward = -85.12, steps = 145\n",
      "09:48:05 [DEBUG] evaluate generation 1441: reward = -85.88, steps = 147\n",
      "09:48:08 [DEBUG] evaluate generation 1442: reward = -86.33, steps = 145\n",
      "09:48:09 [DEBUG] evaluate generation 1443: reward = -85.31, steps = 155\n",
      "09:48:10 [DEBUG] evaluate generation 1444: reward = -85.86, steps = 143\n",
      "09:48:12 [DEBUG] evaluate generation 1445: reward = -86.04, steps = 153\n",
      "09:48:14 [DEBUG] evaluate generation 1446: reward = -85.36, steps = 152\n",
      "09:48:15 [DEBUG] evaluate generation 1447: reward = -85.37, steps = 158\n",
      "09:48:17 [DEBUG] evaluate generation 1448: reward = -85.60, steps = 155\n",
      "09:48:19 [DEBUG] evaluate generation 1449: reward = -85.44, steps = 166\n",
      "09:48:20 [DEBUG] evaluate generation 1450: reward = -85.81, steps = 154\n",
      "09:48:22 [DEBUG] evaluate generation 1451: reward = -85.44, steps = 151\n",
      "09:48:25 [DEBUG] evaluate generation 1452: reward = -85.56, steps = 157\n",
      "09:48:27 [DEBUG] evaluate generation 1453: reward = -85.71, steps = 150\n",
      "09:48:29 [DEBUG] evaluate generation 1454: reward = -85.78, steps = 153\n",
      "09:48:31 [DEBUG] evaluate generation 1455: reward = -85.19, steps = 150\n",
      "09:48:33 [DEBUG] evaluate generation 1456: reward = -85.44, steps = 152\n",
      "09:48:35 [DEBUG] evaluate generation 1457: reward = -84.97, steps = 155\n",
      "09:48:36 [DEBUG] evaluate generation 1458: reward = -84.88, steps = 160\n",
      "09:48:39 [DEBUG] evaluate generation 1459: reward = -85.38, steps = 158\n",
      "09:48:41 [DEBUG] evaluate generation 1460: reward = -85.25, steps = 160\n",
      "09:48:43 [DEBUG] evaluate generation 1461: reward = -85.47, steps = 149\n",
      "09:48:45 [DEBUG] evaluate generation 1462: reward = -85.82, steps = 157\n",
      "09:48:47 [DEBUG] evaluate generation 1463: reward = -85.36, steps = 162\n",
      "09:48:48 [DEBUG] evaluate generation 1464: reward = -85.35, steps = 150\n",
      "09:48:50 [DEBUG] evaluate generation 1465: reward = -85.22, steps = 165\n",
      "09:48:52 [DEBUG] evaluate generation 1466: reward = -85.58, steps = 152\n",
      "09:48:53 [DEBUG] evaluate generation 1467: reward = -85.40, steps = 173\n",
      "09:48:57 [DEBUG] evaluate generation 1468: reward = -84.99, steps = 151\n",
      "09:48:58 [DEBUG] evaluate generation 1469: reward = -85.53, steps = 167\n",
      "09:49:01 [DEBUG] evaluate generation 1470: reward = -85.06, steps = 155\n",
      "09:49:04 [DEBUG] evaluate generation 1471: reward = -85.65, steps = 146\n",
      "09:49:06 [DEBUG] evaluate generation 1472: reward = -85.27, steps = 158\n",
      "09:49:07 [DEBUG] evaluate generation 1473: reward = -85.03, steps = 146\n",
      "09:49:08 [DEBUG] evaluate generation 1474: reward = -85.72, steps = 156\n",
      "09:49:10 [DEBUG] evaluate generation 1475: reward = -85.42, steps = 161\n",
      "09:49:13 [DEBUG] evaluate generation 1476: reward = -85.44, steps = 157\n",
      "09:49:15 [DEBUG] evaluate generation 1477: reward = -85.48, steps = 166\n",
      "09:49:18 [DEBUG] evaluate generation 1478: reward = -85.43, steps = 155\n",
      "09:49:20 [DEBUG] evaluate generation 1479: reward = -85.30, steps = 161\n",
      "09:49:24 [DEBUG] evaluate generation 1480: reward = -85.24, steps = 149\n",
      "09:49:27 [DEBUG] evaluate generation 1481: reward = -85.69, steps = 160\n",
      "09:49:30 [DEBUG] evaluate generation 1482: reward = -85.83, steps = 153\n",
      "09:49:34 [DEBUG] evaluate generation 1483: reward = -85.35, steps = 146\n",
      "09:49:35 [DEBUG] evaluate generation 1484: reward = -85.30, steps = 156\n",
      "09:49:38 [DEBUG] evaluate generation 1485: reward = -85.85, steps = 146\n",
      "09:49:40 [DEBUG] evaluate generation 1486: reward = -85.65, steps = 153\n",
      "09:49:43 [DEBUG] evaluate generation 1487: reward = -85.90, steps = 160\n",
      "09:49:44 [DEBUG] evaluate generation 1488: reward = -85.65, steps = 153\n",
      "09:49:46 [DEBUG] evaluate generation 1489: reward = -85.50, steps = 149\n",
      "09:49:48 [DEBUG] evaluate generation 1490: reward = -85.76, steps = 148\n",
      "09:49:49 [DEBUG] evaluate generation 1491: reward = -85.44, steps = 152\n",
      "09:49:51 [DEBUG] evaluate generation 1492: reward = -87.17, steps = 169\n",
      "09:49:52 [DEBUG] evaluate generation 1493: reward = -85.23, steps = 140\n",
      "09:49:55 [DEBUG] evaluate generation 1494: reward = -85.51, steps = 148\n",
      "09:49:56 [DEBUG] evaluate generation 1495: reward = -85.68, steps = 147\n",
      "09:49:59 [DEBUG] evaluate generation 1496: reward = -85.78, steps = 147\n",
      "09:50:01 [DEBUG] evaluate generation 1497: reward = -87.54, steps = 135\n",
      "09:50:04 [DEBUG] evaluate generation 1498: reward = -87.52, steps = 134\n",
      "09:50:05 [DEBUG] evaluate generation 1499: reward = -85.24, steps = 143\n",
      "09:50:07 [DEBUG] evaluate generation 1500: reward = -85.55, steps = 132\n",
      "09:50:08 [DEBUG] evaluate generation 1501: reward = -85.22, steps = 147\n",
      "09:50:09 [DEBUG] evaluate generation 1502: reward = -85.40, steps = 151\n",
      "09:50:11 [DEBUG] evaluate generation 1503: reward = -85.88, steps = 141\n",
      "09:50:12 [DEBUG] evaluate generation 1504: reward = -85.33, steps = 148\n",
      "09:50:14 [DEBUG] evaluate generation 1505: reward = -85.54, steps = 151\n",
      "09:50:16 [DEBUG] evaluate generation 1506: reward = -85.18, steps = 150\n",
      "09:50:18 [DEBUG] evaluate generation 1507: reward = -85.56, steps = 162\n",
      "09:50:21 [DEBUG] evaluate generation 1508: reward = -85.24, steps = 140\n",
      "09:50:23 [DEBUG] evaluate generation 1509: reward = -85.68, steps = 154\n",
      "09:50:25 [DEBUG] evaluate generation 1510: reward = -85.46, steps = 147\n",
      "09:50:29 [DEBUG] evaluate generation 1511: reward = -84.82, steps = 158\n",
      "09:50:32 [DEBUG] evaluate generation 1512: reward = -85.12, steps = 153\n",
      "09:50:33 [DEBUG] evaluate generation 1513: reward = -85.28, steps = 154\n",
      "09:50:34 [DEBUG] evaluate generation 1514: reward = -86.38, steps = 142\n",
      "09:50:36 [DEBUG] evaluate generation 1515: reward = -85.50, steps = 152\n",
      "09:50:39 [DEBUG] evaluate generation 1516: reward = -84.81, steps = 148\n",
      "09:50:40 [DEBUG] evaluate generation 1517: reward = -86.83, steps = 139\n",
      "09:50:41 [DEBUG] evaluate generation 1518: reward = -86.42, steps = 143\n",
      "09:50:43 [DEBUG] evaluate generation 1519: reward = -85.18, steps = 155\n",
      "09:50:44 [DEBUG] evaluate generation 1520: reward = -84.93, steps = 149\n",
      "09:50:46 [DEBUG] evaluate generation 1521: reward = -84.95, steps = 151\n",
      "09:50:48 [DEBUG] evaluate generation 1522: reward = -85.19, steps = 158\n",
      "09:50:50 [DEBUG] evaluate generation 1523: reward = -85.65, steps = 158\n",
      "09:50:52 [DEBUG] evaluate generation 1524: reward = -85.22, steps = 148\n",
      "09:50:53 [DEBUG] evaluate generation 1525: reward = -85.18, steps = 154\n",
      "09:50:56 [DEBUG] evaluate generation 1526: reward = -85.40, steps = 146\n",
      "09:50:58 [DEBUG] evaluate generation 1527: reward = -84.63, steps = 155\n",
      "09:50:59 [DEBUG] evaluate generation 1528: reward = -84.41, steps = 145\n",
      "09:51:00 [DEBUG] evaluate generation 1529: reward = -85.13, steps = 149\n",
      "09:51:04 [DEBUG] evaluate generation 1530: reward = -85.47, steps = 161\n",
      "09:51:06 [DEBUG] evaluate generation 1531: reward = -86.76, steps = 142\n",
      "09:51:10 [DEBUG] evaluate generation 1532: reward = -85.39, steps = 151\n",
      "09:51:11 [DEBUG] evaluate generation 1533: reward = -85.36, steps = 159\n",
      "09:51:13 [DEBUG] evaluate generation 1534: reward = -85.27, steps = 149\n",
      "09:51:15 [DEBUG] evaluate generation 1535: reward = -85.91, steps = 152\n",
      "09:51:17 [DEBUG] evaluate generation 1536: reward = -85.94, steps = 148\n",
      "09:51:18 [DEBUG] evaluate generation 1537: reward = -85.47, steps = 152\n",
      "09:51:20 [DEBUG] evaluate generation 1538: reward = -85.88, steps = 149\n",
      "09:51:22 [DEBUG] evaluate generation 1539: reward = -85.17, steps = 154\n",
      "09:51:24 [DEBUG] evaluate generation 1540: reward = -85.82, steps = 142\n",
      "09:51:25 [DEBUG] evaluate generation 1541: reward = -85.42, steps = 154\n",
      "09:51:28 [DEBUG] evaluate generation 1542: reward = -85.49, steps = 152\n",
      "09:51:30 [DEBUG] evaluate generation 1543: reward = -85.87, steps = 156\n",
      "09:51:31 [DEBUG] evaluate generation 1544: reward = -85.16, steps = 147\n",
      "09:51:32 [DEBUG] evaluate generation 1545: reward = -86.33, steps = 150\n",
      "09:51:34 [DEBUG] evaluate generation 1546: reward = -85.93, steps = 159\n",
      "09:51:35 [DEBUG] evaluate generation 1547: reward = -85.94, steps = 170\n",
      "09:51:38 [DEBUG] evaluate generation 1548: reward = -85.64, steps = 154\n",
      "09:51:40 [DEBUG] evaluate generation 1549: reward = -85.45, steps = 155\n",
      "09:51:42 [DEBUG] evaluate generation 1550: reward = -85.84, steps = 150\n",
      "09:51:43 [DEBUG] evaluate generation 1551: reward = -85.70, steps = 157\n",
      "09:51:46 [DEBUG] evaluate generation 1552: reward = -85.34, steps = 152\n",
      "09:51:48 [DEBUG] evaluate generation 1553: reward = -84.67, steps = 146\n",
      "09:51:51 [DEBUG] evaluate generation 1554: reward = -85.72, steps = 148\n",
      "09:51:53 [DEBUG] evaluate generation 1555: reward = -86.05, steps = 159\n",
      "09:51:55 [DEBUG] evaluate generation 1556: reward = -85.43, steps = 151\n",
      "09:51:57 [DEBUG] evaluate generation 1557: reward = -85.73, steps = 166\n",
      "09:51:59 [DEBUG] evaluate generation 1558: reward = -85.59, steps = 152\n",
      "09:52:03 [DEBUG] evaluate generation 1559: reward = -85.59, steps = 152\n",
      "09:52:05 [DEBUG] evaluate generation 1560: reward = -85.67, steps = 159\n",
      "09:52:08 [DEBUG] evaluate generation 1561: reward = -85.31, steps = 150\n",
      "09:52:10 [DEBUG] evaluate generation 1562: reward = -85.20, steps = 151\n",
      "09:52:12 [DEBUG] evaluate generation 1563: reward = -85.19, steps = 154\n",
      "09:52:15 [DEBUG] evaluate generation 1564: reward = -85.20, steps = 155\n",
      "09:52:18 [DEBUG] evaluate generation 1565: reward = -85.32, steps = 155\n",
      "09:52:20 [DEBUG] evaluate generation 1566: reward = -85.28, steps = 158\n",
      "09:52:21 [DEBUG] evaluate generation 1567: reward = -84.83, steps = 153\n",
      "09:52:22 [DEBUG] evaluate generation 1568: reward = -85.91, steps = 146\n",
      "09:52:24 [DEBUG] evaluate generation 1569: reward = -85.35, steps = 152\n",
      "09:52:25 [DEBUG] evaluate generation 1570: reward = -84.70, steps = 153\n",
      "09:52:27 [DEBUG] evaluate generation 1571: reward = -85.12, steps = 157\n",
      "09:52:28 [DEBUG] evaluate generation 1572: reward = -85.02, steps = 157\n",
      "09:52:30 [DEBUG] evaluate generation 1573: reward = -84.53, steps = 155\n",
      "09:52:32 [DEBUG] evaluate generation 1574: reward = -84.96, steps = 144\n",
      "09:52:34 [DEBUG] evaluate generation 1575: reward = -85.19, steps = 151\n",
      "09:52:37 [DEBUG] evaluate generation 1576: reward = -85.43, steps = 150\n",
      "09:52:41 [DEBUG] evaluate generation 1577: reward = -85.63, steps = 159\n",
      "09:52:43 [DEBUG] evaluate generation 1578: reward = -85.15, steps = 154\n",
      "09:52:45 [DEBUG] evaluate generation 1579: reward = -85.20, steps = 150\n",
      "09:52:46 [DEBUG] evaluate generation 1580: reward = -85.08, steps = 149\n",
      "09:52:49 [DEBUG] evaluate generation 1581: reward = -85.14, steps = 151\n",
      "09:52:50 [DEBUG] evaluate generation 1582: reward = -85.40, steps = 147\n",
      "09:52:51 [DEBUG] evaluate generation 1583: reward = -85.48, steps = 147\n",
      "09:52:54 [DEBUG] evaluate generation 1584: reward = -85.22, steps = 151\n",
      "09:52:55 [DEBUG] evaluate generation 1585: reward = -85.12, steps = 152\n",
      "09:52:58 [DEBUG] evaluate generation 1586: reward = -85.91, steps = 157\n",
      "09:53:00 [DEBUG] evaluate generation 1587: reward = -85.69, steps = 139\n",
      "09:53:01 [DEBUG] evaluate generation 1588: reward = -85.83, steps = 150\n",
      "09:53:04 [DEBUG] evaluate generation 1589: reward = -85.50, steps = 153\n",
      "09:53:07 [DEBUG] evaluate generation 1590: reward = -87.82, steps = 145\n",
      "09:53:10 [DEBUG] evaluate generation 1591: reward = -84.92, steps = 147\n",
      "09:53:11 [DEBUG] evaluate generation 1592: reward = -85.69, steps = 148\n",
      "09:53:13 [DEBUG] evaluate generation 1593: reward = -85.84, steps = 155\n",
      "09:53:15 [DEBUG] evaluate generation 1594: reward = -85.05, steps = 146\n",
      "09:53:16 [DEBUG] evaluate generation 1595: reward = -85.83, steps = 146\n",
      "09:53:18 [DEBUG] evaluate generation 1596: reward = -84.87, steps = 156\n",
      "09:53:22 [DEBUG] evaluate generation 1597: reward = -87.52, steps = 148\n",
      "09:53:23 [DEBUG] evaluate generation 1598: reward = -85.30, steps = 147\n",
      "09:53:25 [DEBUG] evaluate generation 1599: reward = -85.32, steps = 149\n",
      "09:53:26 [DEBUG] evaluate generation 1600: reward = -84.99, steps = 149\n",
      "09:53:28 [DEBUG] evaluate generation 1601: reward = -85.39, steps = 148\n",
      "09:53:29 [DEBUG] evaluate generation 1602: reward = -84.86, steps = 148\n",
      "09:53:33 [DEBUG] evaluate generation 1603: reward = -86.15, steps = 149\n",
      "09:53:35 [DEBUG] evaluate generation 1604: reward = -85.37, steps = 149\n",
      "09:53:36 [DEBUG] evaluate generation 1605: reward = -84.92, steps = 151\n",
      "09:53:38 [DEBUG] evaluate generation 1606: reward = -85.10, steps = 146\n",
      "09:53:40 [DEBUG] evaluate generation 1607: reward = -86.52, steps = 146\n",
      "09:53:41 [DEBUG] evaluate generation 1608: reward = -85.60, steps = 143\n",
      "09:53:43 [DEBUG] evaluate generation 1609: reward = -85.42, steps = 164\n",
      "09:53:46 [DEBUG] evaluate generation 1610: reward = -85.52, steps = 147\n",
      "09:53:47 [DEBUG] evaluate generation 1611: reward = -85.81, steps = 149\n",
      "09:53:48 [DEBUG] evaluate generation 1612: reward = -86.18, steps = 145\n",
      "09:53:50 [DEBUG] evaluate generation 1613: reward = -85.62, steps = 158\n",
      "09:53:52 [DEBUG] evaluate generation 1614: reward = -85.16, steps = 150\n",
      "09:53:53 [DEBUG] evaluate generation 1615: reward = -85.33, steps = 151\n",
      "09:53:56 [DEBUG] evaluate generation 1616: reward = -86.59, steps = 147\n",
      "09:53:58 [DEBUG] evaluate generation 1617: reward = -85.41, steps = 151\n",
      "09:54:00 [DEBUG] evaluate generation 1618: reward = -85.39, steps = 156\n",
      "09:54:02 [DEBUG] evaluate generation 1619: reward = -87.85, steps = 144\n",
      "09:54:05 [DEBUG] evaluate generation 1620: reward = -85.47, steps = 151\n",
      "09:54:06 [DEBUG] evaluate generation 1621: reward = -85.60, steps = 158\n",
      "09:54:07 [DEBUG] evaluate generation 1622: reward = -85.58, steps = 160\n",
      "09:54:10 [DEBUG] evaluate generation 1623: reward = -86.02, steps = 155\n",
      "09:54:12 [DEBUG] evaluate generation 1624: reward = -85.69, steps = 153\n",
      "09:54:15 [DEBUG] evaluate generation 1625: reward = -87.60, steps = 150\n",
      "09:54:17 [DEBUG] evaluate generation 1626: reward = -86.27, steps = 160\n",
      "09:54:19 [DEBUG] evaluate generation 1627: reward = -86.90, steps = 144\n",
      "09:54:21 [DEBUG] evaluate generation 1628: reward = -85.81, steps = 150\n",
      "09:54:23 [DEBUG] evaluate generation 1629: reward = -85.24, steps = 148\n",
      "09:54:24 [DEBUG] evaluate generation 1630: reward = -85.57, steps = 151\n",
      "09:54:26 [DEBUG] evaluate generation 1631: reward = -85.90, steps = 152\n",
      "09:54:29 [DEBUG] evaluate generation 1632: reward = -85.03, steps = 151\n",
      "09:54:31 [DEBUG] evaluate generation 1633: reward = -85.80, steps = 154\n",
      "09:54:33 [DEBUG] evaluate generation 1634: reward = -85.71, steps = 157\n",
      "09:54:34 [DEBUG] evaluate generation 1635: reward = -85.37, steps = 151\n",
      "09:54:36 [DEBUG] evaluate generation 1636: reward = -85.31, steps = 152\n",
      "09:54:37 [DEBUG] evaluate generation 1637: reward = -86.02, steps = 150\n",
      "09:54:38 [DEBUG] evaluate generation 1638: reward = -85.50, steps = 156\n",
      "09:54:40 [DEBUG] evaluate generation 1639: reward = -85.83, steps = 149\n",
      "09:54:42 [DEBUG] evaluate generation 1640: reward = -85.32, steps = 151\n",
      "09:54:44 [DEBUG] evaluate generation 1641: reward = -85.80, steps = 166\n",
      "09:54:45 [DEBUG] evaluate generation 1642: reward = -85.69, steps = 145\n",
      "09:54:47 [DEBUG] evaluate generation 1643: reward = -85.44, steps = 150\n",
      "09:54:50 [DEBUG] evaluate generation 1644: reward = -85.48, steps = 155\n",
      "09:54:52 [DEBUG] evaluate generation 1645: reward = -85.44, steps = 152\n",
      "09:54:54 [DEBUG] evaluate generation 1646: reward = -85.44, steps = 149\n",
      "09:54:57 [DEBUG] evaluate generation 1647: reward = -85.63, steps = 150\n",
      "09:54:58 [DEBUG] evaluate generation 1648: reward = -85.69, steps = 157\n",
      "09:54:59 [DEBUG] evaluate generation 1649: reward = -85.23, steps = 155\n",
      "09:55:01 [DEBUG] evaluate generation 1650: reward = -85.17, steps = 152\n",
      "09:55:03 [DEBUG] evaluate generation 1651: reward = -85.76, steps = 148\n",
      "09:55:05 [DEBUG] evaluate generation 1652: reward = -85.19, steps = 153\n",
      "09:55:06 [DEBUG] evaluate generation 1653: reward = -85.43, steps = 157\n",
      "09:55:08 [DEBUG] evaluate generation 1654: reward = -85.24, steps = 159\n",
      "09:55:09 [DEBUG] evaluate generation 1655: reward = -85.60, steps = 151\n",
      "09:55:11 [DEBUG] evaluate generation 1656: reward = -85.23, steps = 153\n",
      "09:55:12 [DEBUG] evaluate generation 1657: reward = -85.48, steps = 154\n",
      "09:55:15 [DEBUG] evaluate generation 1658: reward = -85.35, steps = 154\n",
      "09:55:17 [DEBUG] evaluate generation 1659: reward = -85.85, steps = 151\n",
      "09:55:19 [DEBUG] evaluate generation 1660: reward = -85.05, steps = 155\n",
      "09:55:20 [DEBUG] evaluate generation 1661: reward = -85.40, steps = 150\n",
      "09:55:23 [DEBUG] evaluate generation 1662: reward = -85.75, steps = 152\n",
      "09:55:26 [DEBUG] evaluate generation 1663: reward = -86.06, steps = 156\n",
      "09:55:28 [DEBUG] evaluate generation 1664: reward = -85.61, steps = 158\n",
      "09:55:32 [DEBUG] evaluate generation 1665: reward = -85.29, steps = 152\n",
      "09:55:34 [DEBUG] evaluate generation 1666: reward = -85.28, steps = 149\n",
      "09:55:35 [DEBUG] evaluate generation 1667: reward = -85.59, steps = 154\n",
      "09:55:37 [DEBUG] evaluate generation 1668: reward = -85.58, steps = 154\n",
      "09:55:39 [DEBUG] evaluate generation 1669: reward = -85.59, steps = 155\n",
      "09:55:40 [DEBUG] evaluate generation 1670: reward = -85.88, steps = 166\n",
      "09:55:43 [DEBUG] evaluate generation 1671: reward = -85.99, steps = 168\n",
      "09:55:45 [DEBUG] evaluate generation 1672: reward = -85.86, steps = 156\n",
      "09:55:46 [DEBUG] evaluate generation 1673: reward = -86.06, steps = 162\n",
      "09:55:47 [DEBUG] evaluate generation 1674: reward = -85.43, steps = 151\n",
      "09:55:50 [DEBUG] evaluate generation 1675: reward = -86.05, steps = 150\n",
      "09:55:51 [DEBUG] evaluate generation 1676: reward = -85.67, steps = 151\n",
      "09:55:52 [DEBUG] evaluate generation 1677: reward = -85.40, steps = 155\n",
      "09:55:54 [DEBUG] evaluate generation 1678: reward = -85.46, steps = 155\n",
      "09:55:55 [DEBUG] evaluate generation 1679: reward = -85.19, steps = 153\n",
      "09:55:57 [DEBUG] evaluate generation 1680: reward = -85.44, steps = 158\n",
      "09:55:58 [DEBUG] evaluate generation 1681: reward = -85.50, steps = 153\n",
      "09:56:00 [DEBUG] evaluate generation 1682: reward = -85.74, steps = 155\n",
      "09:56:01 [DEBUG] evaluate generation 1683: reward = -86.00, steps = 160\n",
      "09:56:04 [DEBUG] evaluate generation 1684: reward = -86.50, steps = 167\n",
      "09:56:05 [DEBUG] evaluate generation 1685: reward = -85.62, steps = 153\n",
      "09:56:07 [DEBUG] evaluate generation 1686: reward = -85.59, steps = 160\n",
      "09:56:08 [DEBUG] evaluate generation 1687: reward = -85.37, steps = 163\n",
      "09:56:10 [DEBUG] evaluate generation 1688: reward = -85.27, steps = 155\n",
      "09:56:12 [DEBUG] evaluate generation 1689: reward = -25.37, steps = 1600\n",
      "09:56:13 [DEBUG] evaluate generation 1690: reward = -85.76, steps = 158\n",
      "09:56:16 [DEBUG] evaluate generation 1691: reward = -85.37, steps = 159\n",
      "09:56:19 [DEBUG] evaluate generation 1692: reward = -86.06, steps = 164\n",
      "09:56:22 [DEBUG] evaluate generation 1693: reward = -85.69, steps = 153\n",
      "09:56:23 [DEBUG] evaluate generation 1694: reward = -86.17, steps = 160\n",
      "09:56:25 [DEBUG] evaluate generation 1695: reward = -85.49, steps = 158\n",
      "09:56:28 [DEBUG] evaluate generation 1696: reward = -85.85, steps = 153\n",
      "09:56:30 [DEBUG] evaluate generation 1697: reward = -85.01, steps = 164\n",
      "09:56:33 [DEBUG] evaluate generation 1698: reward = -86.48, steps = 159\n",
      "09:56:34 [DEBUG] evaluate generation 1699: reward = -85.49, steps = 165\n",
      "09:56:38 [DEBUG] evaluate generation 1700: reward = -85.69, steps = 160\n",
      "09:56:41 [DEBUG] evaluate generation 1701: reward = -85.53, steps = 159\n",
      "09:56:44 [DEBUG] evaluate generation 1702: reward = -86.02, steps = 150\n",
      "09:56:46 [DEBUG] evaluate generation 1703: reward = -85.79, steps = 151\n",
      "09:56:47 [DEBUG] evaluate generation 1704: reward = -86.29, steps = 150\n",
      "09:56:50 [DEBUG] evaluate generation 1705: reward = -85.99, steps = 152\n",
      "09:56:53 [DEBUG] evaluate generation 1706: reward = -85.36, steps = 153\n",
      "09:56:54 [DEBUG] evaluate generation 1707: reward = -85.65, steps = 148\n",
      "09:56:55 [DEBUG] evaluate generation 1708: reward = -86.41, steps = 155\n",
      "09:56:59 [DEBUG] evaluate generation 1709: reward = -85.76, steps = 147\n",
      "09:57:00 [DEBUG] evaluate generation 1710: reward = -85.72, steps = 152\n",
      "09:57:02 [DEBUG] evaluate generation 1711: reward = -85.89, steps = 160\n",
      "09:57:04 [DEBUG] evaluate generation 1712: reward = -85.60, steps = 161\n",
      "09:57:06 [DEBUG] evaluate generation 1713: reward = -85.54, steps = 156\n",
      "09:57:08 [DEBUG] evaluate generation 1714: reward = -85.55, steps = 157\n",
      "09:57:11 [DEBUG] evaluate generation 1715: reward = -85.63, steps = 160\n",
      "09:57:14 [DEBUG] evaluate generation 1716: reward = -85.73, steps = 155\n",
      "09:57:16 [DEBUG] evaluate generation 1717: reward = -85.36, steps = 155\n",
      "09:57:17 [DEBUG] evaluate generation 1718: reward = -85.85, steps = 167\n",
      "09:57:19 [DEBUG] evaluate generation 1719: reward = -85.58, steps = 153\n",
      "09:57:21 [DEBUG] evaluate generation 1720: reward = -85.59, steps = 153\n",
      "09:57:22 [DEBUG] evaluate generation 1721: reward = -86.50, steps = 154\n",
      "09:57:24 [DEBUG] evaluate generation 1722: reward = -85.73, steps = 155\n",
      "09:57:25 [DEBUG] evaluate generation 1723: reward = -85.74, steps = 151\n",
      "09:57:27 [DEBUG] evaluate generation 1724: reward = -85.49, steps = 152\n",
      "09:57:29 [DEBUG] evaluate generation 1725: reward = -85.93, steps = 163\n",
      "09:57:31 [DEBUG] evaluate generation 1726: reward = -85.87, steps = 154\n",
      "09:57:32 [DEBUG] evaluate generation 1727: reward = -86.16, steps = 158\n",
      "09:57:35 [DEBUG] evaluate generation 1728: reward = -85.94, steps = 163\n",
      "09:57:36 [DEBUG] evaluate generation 1729: reward = -85.81, steps = 157\n",
      "09:57:38 [DEBUG] evaluate generation 1730: reward = -85.89, steps = 157\n",
      "09:57:40 [DEBUG] evaluate generation 1731: reward = -85.51, steps = 155\n",
      "09:57:43 [DEBUG] evaluate generation 1732: reward = -85.99, steps = 158\n",
      "09:57:45 [DEBUG] evaluate generation 1733: reward = -86.28, steps = 166\n",
      "09:57:48 [DEBUG] evaluate generation 1734: reward = -85.63, steps = 151\n",
      "09:57:50 [DEBUG] evaluate generation 1735: reward = -85.97, steps = 152\n",
      "09:57:51 [DEBUG] evaluate generation 1736: reward = -85.89, steps = 147\n",
      "09:57:54 [DEBUG] evaluate generation 1737: reward = -85.57, steps = 144\n",
      "09:57:55 [DEBUG] evaluate generation 1738: reward = -86.14, steps = 157\n",
      "09:57:58 [DEBUG] evaluate generation 1739: reward = -86.05, steps = 159\n",
      "09:58:01 [DEBUG] evaluate generation 1740: reward = -86.28, steps = 152\n",
      "09:58:02 [DEBUG] evaluate generation 1741: reward = -85.87, steps = 152\n",
      "09:58:04 [DEBUG] evaluate generation 1742: reward = -85.75, steps = 156\n",
      "09:58:06 [DEBUG] evaluate generation 1743: reward = -86.03, steps = 147\n",
      "09:58:07 [DEBUG] evaluate generation 1744: reward = -85.43, steps = 153\n",
      "09:58:09 [DEBUG] evaluate generation 1745: reward = -85.31, steps = 155\n",
      "09:58:11 [DEBUG] evaluate generation 1746: reward = -86.20, steps = 150\n",
      "09:58:14 [DEBUG] evaluate generation 1747: reward = -85.55, steps = 152\n",
      "09:58:15 [DEBUG] evaluate generation 1748: reward = -86.02, steps = 155\n",
      "09:58:19 [DEBUG] evaluate generation 1749: reward = -85.55, steps = 152\n",
      "09:58:22 [DEBUG] evaluate generation 1750: reward = -85.41, steps = 153\n",
      "09:58:23 [DEBUG] evaluate generation 1751: reward = -85.87, steps = 149\n",
      "09:58:25 [DEBUG] evaluate generation 1752: reward = -85.21, steps = 158\n",
      "09:58:27 [DEBUG] evaluate generation 1753: reward = -85.82, steps = 167\n",
      "09:58:28 [DEBUG] evaluate generation 1754: reward = -86.58, steps = 147\n",
      "09:58:30 [DEBUG] evaluate generation 1755: reward = -85.71, steps = 153\n",
      "09:58:32 [DEBUG] evaluate generation 1756: reward = -85.62, steps = 156\n",
      "09:58:34 [DEBUG] evaluate generation 1757: reward = -85.60, steps = 154\n",
      "09:58:36 [DEBUG] evaluate generation 1758: reward = -85.61, steps = 165\n",
      "09:58:37 [DEBUG] evaluate generation 1759: reward = -85.70, steps = 155\n",
      "09:58:39 [DEBUG] evaluate generation 1760: reward = -85.63, steps = 158\n",
      "09:58:41 [DEBUG] evaluate generation 1761: reward = -85.83, steps = 161\n",
      "09:58:42 [DEBUG] evaluate generation 1762: reward = -85.26, steps = 153\n",
      "09:58:44 [DEBUG] evaluate generation 1763: reward = -85.61, steps = 150\n",
      "09:58:46 [DEBUG] evaluate generation 1764: reward = -86.13, steps = 151\n",
      "09:58:48 [DEBUG] evaluate generation 1765: reward = -85.75, steps = 150\n",
      "09:58:49 [DEBUG] evaluate generation 1766: reward = -85.49, steps = 156\n",
      "09:58:51 [DEBUG] evaluate generation 1767: reward = -85.48, steps = 156\n",
      "09:58:52 [DEBUG] evaluate generation 1768: reward = -85.96, steps = 160\n",
      "09:58:54 [DEBUG] evaluate generation 1769: reward = -85.82, steps = 161\n",
      "09:58:58 [DEBUG] evaluate generation 1770: reward = -85.92, steps = 151\n",
      "09:59:00 [DEBUG] evaluate generation 1771: reward = -85.17, steps = 149\n",
      "09:59:03 [DEBUG] evaluate generation 1772: reward = -85.76, steps = 162\n",
      "09:59:06 [DEBUG] evaluate generation 1773: reward = -84.93, steps = 154\n",
      "09:59:07 [DEBUG] evaluate generation 1774: reward = -85.78, steps = 149\n",
      "09:59:09 [DEBUG] evaluate generation 1775: reward = -85.94, steps = 158\n",
      "09:59:10 [DEBUG] evaluate generation 1776: reward = -85.70, steps = 153\n",
      "09:59:12 [DEBUG] evaluate generation 1777: reward = -85.34, steps = 153\n",
      "09:59:14 [DEBUG] evaluate generation 1778: reward = -85.49, steps = 152\n",
      "09:59:15 [DEBUG] evaluate generation 1779: reward = -86.27, steps = 151\n",
      "09:59:16 [DEBUG] evaluate generation 1780: reward = -85.41, steps = 150\n",
      "09:59:19 [DEBUG] evaluate generation 1781: reward = -85.80, steps = 153\n",
      "09:59:20 [DEBUG] evaluate generation 1782: reward = -85.75, steps = 153\n",
      "09:59:21 [DEBUG] evaluate generation 1783: reward = -85.79, steps = 147\n",
      "09:59:24 [DEBUG] evaluate generation 1784: reward = -85.91, steps = 145\n",
      "09:59:26 [DEBUG] evaluate generation 1785: reward = -85.43, steps = 155\n",
      "09:59:28 [DEBUG] evaluate generation 1786: reward = -85.50, steps = 153\n",
      "09:59:31 [DEBUG] evaluate generation 1787: reward = -85.64, steps = 149\n",
      "09:59:32 [DEBUG] evaluate generation 1788: reward = -85.71, steps = 158\n",
      "09:59:34 [DEBUG] evaluate generation 1789: reward = -85.43, steps = 149\n",
      "09:59:35 [DEBUG] evaluate generation 1790: reward = -85.46, steps = 156\n",
      "09:59:37 [DEBUG] evaluate generation 1791: reward = -85.90, steps = 152\n",
      "09:59:38 [DEBUG] evaluate generation 1792: reward = -85.82, steps = 146\n",
      "09:59:41 [DEBUG] evaluate generation 1793: reward = -86.45, steps = 147\n",
      "09:59:44 [DEBUG] evaluate generation 1794: reward = -85.86, steps = 145\n",
      "09:59:46 [DEBUG] evaluate generation 1795: reward = -85.26, steps = 144\n",
      "09:59:48 [DEBUG] evaluate generation 1796: reward = -85.71, steps = 149\n",
      "09:59:51 [DEBUG] evaluate generation 1797: reward = -85.61, steps = 144\n",
      "09:59:54 [DEBUG] evaluate generation 1798: reward = -85.91, steps = 148\n",
      "09:59:56 [DEBUG] evaluate generation 1799: reward = -85.58, steps = 152\n",
      "09:59:57 [DEBUG] evaluate generation 1800: reward = -86.35, steps = 155\n",
      "09:59:58 [DEBUG] evaluate generation 1801: reward = -85.66, steps = 156\n",
      "10:00:00 [DEBUG] evaluate generation 1802: reward = -85.13, steps = 149\n",
      "10:00:01 [DEBUG] evaluate generation 1803: reward = -86.27, steps = 154\n",
      "10:00:03 [DEBUG] evaluate generation 1804: reward = -86.10, steps = 145\n",
      "10:00:04 [DEBUG] evaluate generation 1805: reward = -85.81, steps = 151\n",
      "10:00:06 [DEBUG] evaluate generation 1806: reward = -85.71, steps = 147\n",
      "10:00:07 [DEBUG] evaluate generation 1807: reward = -85.89, steps = 151\n",
      "10:00:09 [DEBUG] evaluate generation 1808: reward = -86.85, steps = 150\n",
      "10:00:12 [DEBUG] evaluate generation 1809: reward = -85.32, steps = 150\n",
      "10:00:13 [DEBUG] evaluate generation 1810: reward = -86.09, steps = 148\n",
      "10:00:15 [DEBUG] evaluate generation 1811: reward = -85.78, steps = 149\n",
      "10:00:19 [DEBUG] evaluate generation 1812: reward = -85.83, steps = 151\n",
      "10:00:21 [DEBUG] evaluate generation 1813: reward = -86.19, steps = 148\n",
      "10:00:23 [DEBUG] evaluate generation 1814: reward = -85.73, steps = 148\n",
      "10:00:24 [DEBUG] evaluate generation 1815: reward = -85.77, steps = 153\n",
      "10:00:25 [DEBUG] evaluate generation 1816: reward = -86.11, steps = 148\n",
      "10:00:28 [DEBUG] evaluate generation 1817: reward = -86.03, steps = 147\n",
      "10:00:30 [DEBUG] evaluate generation 1818: reward = -86.69, steps = 150\n",
      "10:00:33 [DEBUG] evaluate generation 1819: reward = -86.08, steps = 151\n",
      "10:00:35 [DEBUG] evaluate generation 1820: reward = -85.81, steps = 151\n",
      "10:00:36 [DEBUG] evaluate generation 1821: reward = -85.48, steps = 150\n",
      "10:00:38 [DEBUG] evaluate generation 1822: reward = -86.52, steps = 145\n",
      "10:00:39 [DEBUG] evaluate generation 1823: reward = -85.75, steps = 159\n",
      "10:00:42 [DEBUG] evaluate generation 1824: reward = -86.00, steps = 147\n",
      "10:00:43 [DEBUG] evaluate generation 1825: reward = -85.65, steps = 152\n",
      "10:00:45 [DEBUG] evaluate generation 1826: reward = -85.90, steps = 154\n",
      "10:00:47 [DEBUG] evaluate generation 1827: reward = -86.68, steps = 149\n",
      "10:00:51 [DEBUG] evaluate generation 1828: reward = -86.91, steps = 145\n",
      "10:00:54 [DEBUG] evaluate generation 1829: reward = -86.55, steps = 152\n",
      "10:00:55 [DEBUG] evaluate generation 1830: reward = -85.59, steps = 150\n",
      "10:00:57 [DEBUG] evaluate generation 1831: reward = -86.58, steps = 149\n",
      "10:01:00 [DEBUG] evaluate generation 1832: reward = -85.94, steps = 153\n",
      "10:01:01 [DEBUG] evaluate generation 1833: reward = -85.49, steps = 154\n",
      "10:01:05 [DEBUG] evaluate generation 1834: reward = -85.40, steps = 159\n",
      "10:01:08 [DEBUG] evaluate generation 1835: reward = -85.28, steps = 145\n",
      "10:01:10 [DEBUG] evaluate generation 1836: reward = -86.16, steps = 149\n",
      "10:01:11 [DEBUG] evaluate generation 1837: reward = -85.54, steps = 150\n",
      "10:01:13 [DEBUG] evaluate generation 1838: reward = -85.06, steps = 157\n",
      "10:01:15 [DEBUG] evaluate generation 1839: reward = -86.91, steps = 153\n",
      "10:01:18 [DEBUG] evaluate generation 1840: reward = -85.70, steps = 148\n",
      "10:01:20 [DEBUG] evaluate generation 1841: reward = -86.44, steps = 148\n",
      "10:01:21 [DEBUG] evaluate generation 1842: reward = -86.80, steps = 147\n",
      "10:01:23 [DEBUG] evaluate generation 1843: reward = -85.55, steps = 153\n",
      "10:01:24 [DEBUG] evaluate generation 1844: reward = -85.73, steps = 158\n",
      "10:01:26 [DEBUG] evaluate generation 1845: reward = -86.26, steps = 154\n",
      "10:01:28 [DEBUG] evaluate generation 1846: reward = -86.44, steps = 150\n",
      "10:01:29 [DEBUG] evaluate generation 1847: reward = -86.61, steps = 151\n",
      "10:01:31 [DEBUG] evaluate generation 1848: reward = -86.91, steps = 150\n",
      "10:01:32 [DEBUG] evaluate generation 1849: reward = -85.70, steps = 148\n",
      "10:01:35 [DEBUG] evaluate generation 1850: reward = -85.56, steps = 151\n",
      "10:01:36 [DEBUG] evaluate generation 1851: reward = -87.26, steps = 153\n",
      "10:01:38 [DEBUG] evaluate generation 1852: reward = -86.42, steps = 148\n",
      "10:01:39 [DEBUG] evaluate generation 1853: reward = -86.71, steps = 147\n",
      "10:01:43 [DEBUG] evaluate generation 1854: reward = -87.37, steps = 142\n",
      "10:01:46 [DEBUG] evaluate generation 1855: reward = -87.31, steps = 147\n",
      "10:01:48 [DEBUG] evaluate generation 1856: reward = -84.78, steps = 147\n",
      "10:01:50 [DEBUG] evaluate generation 1857: reward = -84.88, steps = 146\n",
      "10:01:52 [DEBUG] evaluate generation 1858: reward = -85.38, steps = 150\n",
      "10:01:54 [DEBUG] evaluate generation 1859: reward = -86.93, steps = 141\n",
      "10:01:55 [DEBUG] evaluate generation 1860: reward = -85.61, steps = 141\n",
      "10:01:57 [DEBUG] evaluate generation 1861: reward = -85.46, steps = 151\n",
      "10:01:58 [DEBUG] evaluate generation 1862: reward = -86.45, steps = 148\n",
      "10:02:01 [DEBUG] evaluate generation 1863: reward = -86.67, steps = 146\n",
      "10:02:04 [DEBUG] evaluate generation 1864: reward = -86.93, steps = 149\n",
      "10:02:05 [DEBUG] evaluate generation 1865: reward = -85.02, steps = 150\n",
      "10:02:07 [DEBUG] evaluate generation 1866: reward = -85.46, steps = 148\n",
      "10:02:08 [DEBUG] evaluate generation 1867: reward = -86.39, steps = 150\n",
      "10:02:11 [DEBUG] evaluate generation 1868: reward = -87.00, steps = 144\n",
      "10:02:13 [DEBUG] evaluate generation 1869: reward = -85.51, steps = 148\n",
      "10:02:15 [DEBUG] evaluate generation 1870: reward = -87.01, steps = 146\n",
      "10:02:19 [DEBUG] evaluate generation 1871: reward = -87.24, steps = 154\n",
      "10:02:22 [DEBUG] evaluate generation 1872: reward = -86.23, steps = 142\n",
      "10:02:24 [DEBUG] evaluate generation 1873: reward = -85.18, steps = 154\n",
      "10:02:25 [DEBUG] evaluate generation 1874: reward = -85.13, steps = 145\n",
      "10:02:27 [DEBUG] evaluate generation 1875: reward = -85.26, steps = 148\n",
      "10:02:28 [DEBUG] evaluate generation 1876: reward = -84.76, steps = 148\n",
      "10:02:31 [DEBUG] evaluate generation 1877: reward = -86.18, steps = 147\n",
      "10:02:32 [DEBUG] evaluate generation 1878: reward = -86.79, steps = 150\n",
      "10:02:34 [DEBUG] evaluate generation 1879: reward = -86.26, steps = 152\n",
      "10:02:36 [DEBUG] evaluate generation 1880: reward = -85.05, steps = 146\n",
      "10:02:37 [DEBUG] evaluate generation 1881: reward = -86.71, steps = 149\n",
      "10:02:39 [DEBUG] evaluate generation 1882: reward = -85.89, steps = 154\n",
      "10:02:40 [DEBUG] evaluate generation 1883: reward = -86.74, steps = 148\n",
      "10:02:43 [DEBUG] evaluate generation 1884: reward = -86.40, steps = 151\n",
      "10:02:45 [DEBUG] evaluate generation 1885: reward = -85.68, steps = 152\n",
      "10:02:48 [DEBUG] evaluate generation 1886: reward = -86.58, steps = 147\n",
      "10:02:49 [DEBUG] evaluate generation 1887: reward = -86.69, steps = 170\n",
      "10:02:52 [DEBUG] evaluate generation 1888: reward = -86.54, steps = 147\n",
      "10:02:54 [DEBUG] evaluate generation 1889: reward = -85.63, steps = 152\n",
      "10:02:55 [DEBUG] evaluate generation 1890: reward = -85.51, steps = 150\n",
      "10:02:57 [DEBUG] evaluate generation 1891: reward = -85.28, steps = 158\n",
      "10:02:59 [DEBUG] evaluate generation 1892: reward = -85.93, steps = 149\n",
      "10:03:01 [DEBUG] evaluate generation 1893: reward = -84.82, steps = 153\n",
      "10:03:02 [DEBUG] evaluate generation 1894: reward = -87.54, steps = 151\n",
      "10:03:05 [DEBUG] evaluate generation 1895: reward = -87.06, steps = 155\n",
      "10:03:06 [DEBUG] evaluate generation 1896: reward = -86.34, steps = 163\n",
      "10:03:07 [DEBUG] evaluate generation 1897: reward = -85.97, steps = 157\n",
      "10:03:10 [DEBUG] evaluate generation 1898: reward = -85.29, steps = 149\n",
      "10:03:12 [DEBUG] evaluate generation 1899: reward = -85.31, steps = 146\n",
      "10:03:14 [DEBUG] evaluate generation 1900: reward = -85.65, steps = 156\n",
      "10:03:16 [DEBUG] evaluate generation 1901: reward = -85.03, steps = 153\n",
      "10:03:19 [DEBUG] evaluate generation 1902: reward = -84.87, steps = 158\n",
      "10:03:21 [DEBUG] evaluate generation 1903: reward = -85.41, steps = 154\n",
      "10:03:22 [DEBUG] evaluate generation 1904: reward = -85.06, steps = 150\n",
      "10:03:24 [DEBUG] evaluate generation 1905: reward = -84.99, steps = 159\n",
      "10:03:26 [DEBUG] evaluate generation 1906: reward = -85.83, steps = 151\n",
      "10:03:27 [DEBUG] evaluate generation 1907: reward = -85.36, steps = 152\n",
      "10:03:29 [DEBUG] evaluate generation 1908: reward = -85.02, steps = 158\n",
      "10:03:31 [DEBUG] evaluate generation 1909: reward = -85.12, steps = 150\n",
      "10:03:34 [DEBUG] evaluate generation 1910: reward = -85.60, steps = 146\n",
      "10:03:35 [DEBUG] evaluate generation 1911: reward = -85.29, steps = 151\n",
      "10:03:39 [DEBUG] evaluate generation 1912: reward = -85.35, steps = 154\n",
      "10:03:43 [DEBUG] evaluate generation 1913: reward = -85.88, steps = 151\n",
      "10:03:46 [DEBUG] evaluate generation 1914: reward = -85.34, steps = 153\n",
      "10:03:48 [DEBUG] evaluate generation 1915: reward = -85.22, steps = 150\n",
      "10:03:50 [DEBUG] evaluate generation 1916: reward = -86.85, steps = 146\n",
      "10:03:52 [DEBUG] evaluate generation 1917: reward = -86.30, steps = 155\n",
      "10:03:54 [DEBUG] evaluate generation 1918: reward = -86.57, steps = 154\n",
      "10:03:56 [DEBUG] evaluate generation 1919: reward = -85.51, steps = 153\n",
      "10:03:57 [DEBUG] evaluate generation 1920: reward = -85.71, steps = 153\n",
      "10:04:00 [DEBUG] evaluate generation 1921: reward = -84.57, steps = 149\n",
      "10:04:01 [DEBUG] evaluate generation 1922: reward = -85.36, steps = 151\n",
      "10:04:03 [DEBUG] evaluate generation 1923: reward = -85.34, steps = 150\n",
      "10:04:07 [DEBUG] evaluate generation 1924: reward = -86.27, steps = 147\n",
      "10:04:10 [DEBUG] evaluate generation 1925: reward = -85.28, steps = 153\n",
      "10:04:11 [DEBUG] evaluate generation 1926: reward = -86.28, steps = 150\n",
      "10:04:12 [DEBUG] evaluate generation 1927: reward = -85.63, steps = 151\n",
      "10:04:15 [DEBUG] evaluate generation 1928: reward = -85.32, steps = 151\n",
      "10:04:16 [DEBUG] evaluate generation 1929: reward = -85.73, steps = 148\n",
      "10:04:19 [DEBUG] evaluate generation 1930: reward = -85.20, steps = 151\n",
      "10:04:21 [DEBUG] evaluate generation 1931: reward = -85.72, steps = 145\n",
      "10:04:24 [DEBUG] evaluate generation 1932: reward = -86.81, steps = 152\n",
      "10:04:26 [DEBUG] evaluate generation 1933: reward = -85.02, steps = 157\n",
      "10:04:28 [DEBUG] evaluate generation 1934: reward = -87.11, steps = 150\n",
      "10:04:30 [DEBUG] evaluate generation 1935: reward = -85.90, steps = 159\n",
      "10:04:32 [DEBUG] evaluate generation 1936: reward = -86.04, steps = 155\n",
      "10:04:34 [DEBUG] evaluate generation 1937: reward = -85.78, steps = 152\n",
      "10:04:36 [DEBUG] evaluate generation 1938: reward = -86.82, steps = 144\n",
      "10:04:38 [DEBUG] evaluate generation 1939: reward = -85.95, steps = 155\n",
      "10:04:39 [DEBUG] evaluate generation 1940: reward = -86.54, steps = 141\n",
      "10:04:41 [DEBUG] evaluate generation 1941: reward = -85.90, steps = 161\n",
      "10:04:42 [DEBUG] evaluate generation 1942: reward = -85.29, steps = 149\n",
      "10:04:44 [DEBUG] evaluate generation 1943: reward = -85.62, steps = 152\n",
      "10:04:45 [DEBUG] evaluate generation 1944: reward = -86.34, steps = 147\n",
      "10:04:49 [DEBUG] evaluate generation 1945: reward = -86.12, steps = 145\n",
      "10:04:50 [DEBUG] evaluate generation 1946: reward = -85.56, steps = 152\n",
      "10:04:52 [DEBUG] evaluate generation 1947: reward = -84.90, steps = 159\n",
      "10:04:54 [DEBUG] evaluate generation 1948: reward = -85.41, steps = 152\n",
      "10:04:56 [DEBUG] evaluate generation 1949: reward = -85.22, steps = 152\n",
      "10:05:00 [DEBUG] evaluate generation 1950: reward = -85.98, steps = 144\n",
      "10:05:02 [DEBUG] evaluate generation 1951: reward = -87.10, steps = 146\n",
      "10:05:04 [DEBUG] evaluate generation 1952: reward = -85.09, steps = 143\n",
      "10:05:06 [DEBUG] evaluate generation 1953: reward = -85.18, steps = 150\n",
      "10:05:08 [DEBUG] evaluate generation 1954: reward = -85.34, steps = 146\n",
      "10:05:10 [DEBUG] evaluate generation 1955: reward = -85.29, steps = 151\n",
      "10:05:12 [DEBUG] evaluate generation 1956: reward = -85.29, steps = 146\n",
      "10:05:14 [DEBUG] evaluate generation 1957: reward = -85.88, steps = 148\n",
      "10:05:17 [DEBUG] evaluate generation 1958: reward = -86.37, steps = 142\n",
      "10:05:19 [DEBUG] evaluate generation 1959: reward = -87.83, steps = 145\n",
      "10:05:20 [DEBUG] evaluate generation 1960: reward = -85.35, steps = 148\n",
      "10:05:22 [DEBUG] evaluate generation 1961: reward = -85.24, steps = 149\n",
      "10:05:25 [DEBUG] evaluate generation 1962: reward = -86.36, steps = 146\n",
      "10:05:26 [DEBUG] evaluate generation 1963: reward = -85.04, steps = 151\n",
      "10:05:28 [DEBUG] evaluate generation 1964: reward = -85.76, steps = 148\n",
      "10:05:31 [DEBUG] evaluate generation 1965: reward = -85.07, steps = 148\n",
      "10:05:35 [DEBUG] evaluate generation 1966: reward = -85.52, steps = 152\n",
      "10:05:36 [DEBUG] evaluate generation 1967: reward = -85.30, steps = 151\n",
      "10:05:38 [DEBUG] evaluate generation 1968: reward = -85.87, steps = 147\n",
      "10:05:40 [DEBUG] evaluate generation 1969: reward = -85.13, steps = 151\n",
      "10:05:43 [DEBUG] evaluate generation 1970: reward = -85.98, steps = 145\n",
      "10:05:46 [DEBUG] evaluate generation 1971: reward = -85.30, steps = 146\n",
      "10:05:48 [DEBUG] evaluate generation 1972: reward = -85.22, steps = 151\n",
      "10:05:49 [DEBUG] evaluate generation 1973: reward = -86.30, steps = 147\n",
      "10:05:51 [DEBUG] evaluate generation 1974: reward = -85.70, steps = 145\n",
      "10:05:53 [DEBUG] evaluate generation 1975: reward = -87.32, steps = 147\n",
      "10:05:54 [DEBUG] evaluate generation 1976: reward = -85.65, steps = 147\n",
      "10:05:56 [DEBUG] evaluate generation 1977: reward = -85.93, steps = 140\n",
      "10:06:00 [DEBUG] evaluate generation 1978: reward = -85.77, steps = 143\n",
      "10:06:02 [DEBUG] evaluate generation 1979: reward = -85.46, steps = 144\n",
      "10:06:03 [DEBUG] evaluate generation 1980: reward = -85.07, steps = 146\n",
      "10:06:07 [DEBUG] evaluate generation 1981: reward = -85.85, steps = 152\n",
      "10:06:09 [DEBUG] evaluate generation 1982: reward = -86.33, steps = 153\n",
      "10:06:11 [DEBUG] evaluate generation 1983: reward = -86.12, steps = 149\n",
      "10:06:13 [DEBUG] evaluate generation 1984: reward = -87.03, steps = 150\n",
      "10:06:15 [DEBUG] evaluate generation 1985: reward = -85.44, steps = 153\n",
      "10:06:17 [DEBUG] evaluate generation 1986: reward = -86.24, steps = 150\n",
      "10:06:19 [DEBUG] evaluate generation 1987: reward = -85.35, steps = 147\n",
      "10:06:21 [DEBUG] evaluate generation 1988: reward = -87.22, steps = 149\n",
      "10:06:22 [DEBUG] evaluate generation 1989: reward = -86.09, steps = 145\n",
      "10:06:25 [DEBUG] evaluate generation 1990: reward = -86.16, steps = 144\n",
      "10:06:29 [DEBUG] evaluate generation 1991: reward = -85.59, steps = 149\n",
      "10:06:31 [DEBUG] evaluate generation 1992: reward = -85.76, steps = 155\n",
      "10:06:33 [DEBUG] evaluate generation 1993: reward = -87.02, steps = 147\n",
      "10:06:34 [DEBUG] evaluate generation 1994: reward = -85.46, steps = 151\n",
      "10:06:36 [DEBUG] evaluate generation 1995: reward = -85.78, steps = 150\n",
      "10:06:39 [DEBUG] evaluate generation 1996: reward = -85.45, steps = 148\n",
      "10:06:40 [DEBUG] evaluate generation 1997: reward = -85.56, steps = 153\n",
      "10:06:42 [DEBUG] evaluate generation 1998: reward = -85.75, steps = 150\n",
      "10:06:43 [DEBUG] evaluate generation 1999: reward = -86.69, steps = 158\n",
      "10:06:46 [DEBUG] evaluate generation 2000: reward = -85.72, steps = 153\n",
      "10:06:48 [DEBUG] evaluate generation 2001: reward = -84.74, steps = 155\n",
      "10:06:49 [DEBUG] evaluate generation 2002: reward = -85.44, steps = 154\n",
      "10:06:52 [DEBUG] evaluate generation 2003: reward = -85.76, steps = 155\n",
      "10:06:54 [DEBUG] evaluate generation 2004: reward = -86.32, steps = 152\n",
      "10:06:55 [DEBUG] evaluate generation 2005: reward = -85.46, steps = 155\n",
      "10:06:57 [DEBUG] evaluate generation 2006: reward = -86.02, steps = 155\n",
      "10:06:58 [DEBUG] evaluate generation 2007: reward = -85.69, steps = 155\n",
      "10:07:01 [DEBUG] evaluate generation 2008: reward = -85.93, steps = 153\n",
      "10:07:02 [DEBUG] evaluate generation 2009: reward = -85.46, steps = 162\n",
      "10:07:06 [DEBUG] evaluate generation 2010: reward = -86.05, steps = 146\n",
      "10:07:08 [DEBUG] evaluate generation 2011: reward = -85.74, steps = 149\n",
      "10:07:09 [DEBUG] evaluate generation 2012: reward = -85.89, steps = 152\n",
      "10:07:11 [DEBUG] evaluate generation 2013: reward = -85.50, steps = 149\n",
      "10:07:13 [DEBUG] evaluate generation 2014: reward = -87.19, steps = 151\n",
      "10:07:16 [DEBUG] evaluate generation 2015: reward = -85.73, steps = 155\n",
      "10:07:18 [DEBUG] evaluate generation 2016: reward = -85.82, steps = 150\n",
      "10:07:21 [DEBUG] evaluate generation 2017: reward = -85.80, steps = 149\n",
      "10:07:23 [DEBUG] evaluate generation 2018: reward = -85.42, steps = 155\n",
      "10:07:26 [DEBUG] evaluate generation 2019: reward = -85.60, steps = 151\n",
      "10:07:28 [DEBUG] evaluate generation 2020: reward = -85.92, steps = 147\n",
      "10:07:30 [DEBUG] evaluate generation 2021: reward = -85.89, steps = 151\n",
      "10:07:32 [DEBUG] evaluate generation 2022: reward = -85.61, steps = 150\n",
      "10:07:34 [DEBUG] evaluate generation 2023: reward = -86.09, steps = 149\n",
      "10:07:35 [DEBUG] evaluate generation 2024: reward = -87.36, steps = 144\n",
      "10:07:37 [DEBUG] evaluate generation 2025: reward = -85.65, steps = 151\n",
      "10:07:40 [DEBUG] evaluate generation 2026: reward = -85.63, steps = 153\n",
      "10:07:41 [DEBUG] evaluate generation 2027: reward = -85.52, steps = 157\n",
      "10:07:43 [DEBUG] evaluate generation 2028: reward = -85.87, steps = 151\n",
      "10:07:44 [DEBUG] evaluate generation 2029: reward = -85.64, steps = 155\n",
      "10:07:46 [DEBUG] evaluate generation 2030: reward = -85.34, steps = 153\n",
      "10:07:48 [DEBUG] evaluate generation 2031: reward = -86.66, steps = 150\n",
      "10:07:50 [DEBUG] evaluate generation 2032: reward = -85.93, steps = 148\n",
      "10:07:52 [DEBUG] evaluate generation 2033: reward = -86.24, steps = 151\n",
      "10:07:54 [DEBUG] evaluate generation 2034: reward = -86.08, steps = 152\n",
      "10:07:57 [DEBUG] evaluate generation 2035: reward = -86.11, steps = 146\n",
      "10:08:00 [DEBUG] evaluate generation 2036: reward = -86.29, steps = 151\n",
      "10:08:02 [DEBUG] evaluate generation 2037: reward = -84.66, steps = 151\n",
      "10:08:03 [DEBUG] evaluate generation 2038: reward = -87.23, steps = 152\n",
      "10:08:05 [DEBUG] evaluate generation 2039: reward = -85.99, steps = 150\n",
      "10:08:06 [DEBUG] evaluate generation 2040: reward = -85.75, steps = 147\n",
      "10:08:09 [DEBUG] evaluate generation 2041: reward = -85.56, steps = 155\n",
      "10:08:10 [DEBUG] evaluate generation 2042: reward = -85.01, steps = 152\n",
      "10:08:12 [DEBUG] evaluate generation 2043: reward = -85.62, steps = 152\n",
      "10:08:15 [DEBUG] evaluate generation 2044: reward = -84.76, steps = 152\n",
      "10:08:16 [DEBUG] evaluate generation 2045: reward = -85.09, steps = 151\n",
      "10:08:18 [DEBUG] evaluate generation 2046: reward = -86.06, steps = 155\n",
      "10:08:19 [DEBUG] evaluate generation 2047: reward = -85.27, steps = 152\n",
      "10:08:21 [DEBUG] evaluate generation 2048: reward = -86.27, steps = 152\n",
      "10:08:24 [DEBUG] evaluate generation 2049: reward = -85.33, steps = 158\n",
      "10:08:27 [DEBUG] evaluate generation 2050: reward = -86.08, steps = 150\n",
      "10:08:29 [DEBUG] evaluate generation 2051: reward = -86.11, steps = 157\n",
      "10:08:32 [DEBUG] evaluate generation 2052: reward = -85.73, steps = 153\n",
      "10:08:36 [DEBUG] evaluate generation 2053: reward = -85.43, steps = 153\n",
      "10:08:37 [DEBUG] evaluate generation 2054: reward = -85.78, steps = 149\n",
      "10:08:38 [DEBUG] evaluate generation 2055: reward = -86.76, steps = 151\n",
      "10:08:40 [DEBUG] evaluate generation 2056: reward = -86.63, steps = 154\n",
      "10:08:41 [DEBUG] evaluate generation 2057: reward = -85.67, steps = 159\n",
      "10:08:44 [DEBUG] evaluate generation 2058: reward = -85.30, steps = 153\n",
      "10:08:47 [DEBUG] evaluate generation 2059: reward = -85.92, steps = 149\n",
      "10:08:50 [DEBUG] evaluate generation 2060: reward = -85.45, steps = 145\n",
      "10:08:52 [DEBUG] evaluate generation 2061: reward = -85.82, steps = 152\n",
      "10:08:53 [DEBUG] evaluate generation 2062: reward = -85.46, steps = 146\n",
      "10:08:56 [DEBUG] evaluate generation 2063: reward = -85.59, steps = 157\n",
      "10:08:58 [DEBUG] evaluate generation 2064: reward = -86.13, steps = 148\n",
      "10:08:59 [DEBUG] evaluate generation 2065: reward = -85.34, steps = 156\n",
      "10:09:03 [DEBUG] evaluate generation 2066: reward = -85.57, steps = 157\n",
      "10:09:04 [DEBUG] evaluate generation 2067: reward = -85.42, steps = 158\n",
      "10:09:06 [DEBUG] evaluate generation 2068: reward = -85.21, steps = 156\n",
      "10:09:08 [DEBUG] evaluate generation 2069: reward = -87.20, steps = 146\n",
      "10:09:09 [DEBUG] evaluate generation 2070: reward = -86.47, steps = 152\n",
      "10:09:12 [DEBUG] evaluate generation 2071: reward = -85.95, steps = 146\n",
      "10:09:14 [DEBUG] evaluate generation 2072: reward = -85.56, steps = 156\n",
      "10:09:16 [DEBUG] evaluate generation 2073: reward = -85.52, steps = 145\n",
      "10:09:19 [DEBUG] evaluate generation 2074: reward = -86.07, steps = 151\n",
      "10:09:20 [DEBUG] evaluate generation 2075: reward = -86.24, steps = 148\n",
      "10:09:22 [DEBUG] evaluate generation 2076: reward = -86.11, steps = 153\n",
      "10:09:23 [DEBUG] evaluate generation 2077: reward = -85.80, steps = 152\n",
      "10:09:25 [DEBUG] evaluate generation 2078: reward = -85.32, steps = 154\n",
      "10:09:26 [DEBUG] evaluate generation 2079: reward = -85.69, steps = 167\n",
      "10:09:28 [DEBUG] evaluate generation 2080: reward = -87.40, steps = 147\n",
      "10:09:29 [DEBUG] evaluate generation 2081: reward = -85.62, steps = 148\n",
      "10:09:30 [DEBUG] evaluate generation 2082: reward = -87.32, steps = 149\n",
      "10:09:32 [DEBUG] evaluate generation 2083: reward = -85.08, steps = 146\n",
      "10:09:33 [DEBUG] evaluate generation 2084: reward = -85.91, steps = 144\n",
      "10:09:35 [DEBUG] evaluate generation 2085: reward = -85.52, steps = 148\n",
      "10:09:37 [DEBUG] evaluate generation 2086: reward = -85.44, steps = 155\n",
      "10:09:40 [DEBUG] evaluate generation 2087: reward = -85.33, steps = 152\n",
      "10:09:41 [DEBUG] evaluate generation 2088: reward = -85.43, steps = 148\n",
      "10:09:43 [DEBUG] evaluate generation 2089: reward = -85.66, steps = 149\n",
      "10:09:44 [DEBUG] evaluate generation 2090: reward = -85.02, steps = 149\n",
      "10:09:46 [DEBUG] evaluate generation 2091: reward = -85.08, steps = 150\n",
      "10:09:48 [DEBUG] evaluate generation 2092: reward = -85.01, steps = 160\n",
      "10:09:49 [DEBUG] evaluate generation 2093: reward = -84.98, steps = 149\n",
      "10:09:51 [DEBUG] evaluate generation 2094: reward = -85.38, steps = 150\n",
      "10:09:52 [DEBUG] evaluate generation 2095: reward = -86.07, steps = 153\n",
      "10:09:55 [DEBUG] evaluate generation 2096: reward = -85.44, steps = 150\n",
      "10:09:57 [DEBUG] evaluate generation 2097: reward = -85.27, steps = 150\n",
      "10:09:59 [DEBUG] evaluate generation 2098: reward = -85.25, steps = 154\n",
      "10:10:01 [DEBUG] evaluate generation 2099: reward = -84.87, steps = 150\n",
      "10:10:03 [DEBUG] evaluate generation 2100: reward = -85.58, steps = 146\n",
      "10:10:06 [DEBUG] evaluate generation 2101: reward = -86.02, steps = 146\n",
      "10:10:08 [DEBUG] evaluate generation 2102: reward = -84.99, steps = 148\n",
      "10:10:10 [DEBUG] evaluate generation 2103: reward = -87.63, steps = 144\n",
      "10:10:11 [DEBUG] evaluate generation 2104: reward = -85.57, steps = 149\n",
      "10:10:14 [DEBUG] evaluate generation 2105: reward = -84.84, steps = 146\n",
      "10:10:16 [DEBUG] evaluate generation 2106: reward = -87.13, steps = 149\n",
      "10:10:18 [DEBUG] evaluate generation 2107: reward = -86.62, steps = 145\n",
      "10:10:19 [DEBUG] evaluate generation 2108: reward = -86.91, steps = 143\n",
      "10:10:21 [DEBUG] evaluate generation 2109: reward = -84.86, steps = 148\n",
      "10:10:25 [DEBUG] evaluate generation 2110: reward = -85.04, steps = 146\n",
      "10:10:26 [DEBUG] evaluate generation 2111: reward = -85.33, steps = 148\n",
      "10:10:28 [DEBUG] evaluate generation 2112: reward = -85.88, steps = 149\n",
      "10:10:29 [DEBUG] evaluate generation 2113: reward = -85.05, steps = 152\n",
      "10:10:31 [DEBUG] evaluate generation 2114: reward = -84.90, steps = 147\n",
      "10:10:35 [DEBUG] evaluate generation 2115: reward = -85.80, steps = 148\n",
      "10:10:36 [DEBUG] evaluate generation 2116: reward = -85.46, steps = 141\n",
      "10:10:38 [DEBUG] evaluate generation 2117: reward = -87.82, steps = 146\n",
      "10:10:40 [DEBUG] evaluate generation 2118: reward = -86.35, steps = 150\n",
      "10:10:41 [DEBUG] evaluate generation 2119: reward = -87.21, steps = 154\n",
      "10:10:43 [DEBUG] evaluate generation 2120: reward = -85.34, steps = 145\n",
      "10:10:47 [DEBUG] evaluate generation 2121: reward = -85.51, steps = 150\n",
      "10:10:49 [DEBUG] evaluate generation 2122: reward = -85.56, steps = 144\n",
      "10:10:50 [DEBUG] evaluate generation 2123: reward = -85.03, steps = 148\n",
      "10:10:53 [DEBUG] evaluate generation 2124: reward = -87.00, steps = 151\n",
      "10:10:56 [DEBUG] evaluate generation 2125: reward = -85.28, steps = 146\n",
      "10:10:58 [DEBUG] evaluate generation 2126: reward = -84.96, steps = 152\n",
      "10:10:59 [DEBUG] evaluate generation 2127: reward = -85.11, steps = 150\n",
      "10:11:00 [DEBUG] evaluate generation 2128: reward = -85.00, steps = 153\n",
      "10:11:03 [DEBUG] evaluate generation 2129: reward = -85.03, steps = 149\n",
      "10:11:04 [DEBUG] evaluate generation 2130: reward = -87.45, steps = 151\n",
      "10:11:06 [DEBUG] evaluate generation 2131: reward = -84.96, steps = 147\n",
      "10:11:08 [DEBUG] evaluate generation 2132: reward = -87.44, steps = 154\n",
      "10:11:11 [DEBUG] evaluate generation 2133: reward = -84.88, steps = 154\n",
      "10:11:13 [DEBUG] evaluate generation 2134: reward = -85.07, steps = 151\n",
      "10:11:14 [DEBUG] evaluate generation 2135: reward = -84.67, steps = 153\n",
      "10:11:16 [DEBUG] evaluate generation 2136: reward = -85.56, steps = 157\n",
      "10:11:18 [DEBUG] evaluate generation 2137: reward = -84.83, steps = 151\n",
      "10:11:21 [DEBUG] evaluate generation 2138: reward = -85.22, steps = 155\n",
      "10:11:23 [DEBUG] evaluate generation 2139: reward = -85.35, steps = 150\n",
      "10:11:24 [DEBUG] evaluate generation 2140: reward = -85.59, steps = 151\n",
      "10:11:26 [DEBUG] evaluate generation 2141: reward = -85.32, steps = 146\n",
      "10:11:30 [DEBUG] evaluate generation 2142: reward = -85.76, steps = 147\n",
      "10:11:33 [DEBUG] evaluate generation 2143: reward = -84.86, steps = 153\n",
      "10:11:35 [DEBUG] evaluate generation 2144: reward = -85.74, steps = 147\n",
      "10:11:36 [DEBUG] evaluate generation 2145: reward = -86.11, steps = 147\n",
      "10:11:39 [DEBUG] evaluate generation 2146: reward = -85.25, steps = 148\n",
      "10:11:40 [DEBUG] evaluate generation 2147: reward = -84.89, steps = 148\n",
      "10:11:43 [DEBUG] evaluate generation 2148: reward = -86.80, steps = 148\n",
      "10:11:44 [DEBUG] evaluate generation 2149: reward = -85.80, steps = 148\n",
      "10:11:46 [DEBUG] evaluate generation 2150: reward = -84.70, steps = 154\n",
      "10:11:47 [DEBUG] evaluate generation 2151: reward = -85.50, steps = 145\n",
      "10:11:49 [DEBUG] evaluate generation 2152: reward = -85.90, steps = 175\n",
      "10:11:50 [DEBUG] evaluate generation 2153: reward = -86.74, steps = 148\n",
      "10:11:53 [DEBUG] evaluate generation 2154: reward = -84.87, steps = 148\n",
      "10:11:54 [DEBUG] evaluate generation 2155: reward = -85.82, steps = 149\n",
      "10:11:57 [DEBUG] evaluate generation 2156: reward = -85.28, steps = 149\n",
      "10:11:58 [DEBUG] evaluate generation 2157: reward = -85.73, steps = 147\n",
      "10:12:00 [DEBUG] evaluate generation 2158: reward = -85.59, steps = 155\n",
      "10:12:02 [DEBUG] evaluate generation 2159: reward = -85.71, steps = 151\n",
      "10:12:05 [DEBUG] evaluate generation 2160: reward = -85.31, steps = 147\n",
      "10:12:06 [DEBUG] evaluate generation 2161: reward = -85.49, steps = 159\n",
      "10:12:08 [DEBUG] evaluate generation 2162: reward = -86.18, steps = 149\n",
      "10:12:10 [DEBUG] evaluate generation 2163: reward = -85.47, steps = 143\n",
      "10:12:13 [DEBUG] evaluate generation 2164: reward = -87.35, steps = 149\n",
      "10:12:14 [DEBUG] evaluate generation 2165: reward = -85.11, steps = 146\n",
      "10:12:17 [DEBUG] evaluate generation 2166: reward = -85.30, steps = 146\n",
      "10:12:18 [DEBUG] evaluate generation 2167: reward = -85.30, steps = 153\n",
      "10:12:21 [DEBUG] evaluate generation 2168: reward = -84.94, steps = 152\n",
      "10:12:23 [DEBUG] evaluate generation 2169: reward = -85.85, steps = 149\n",
      "10:12:25 [DEBUG] evaluate generation 2170: reward = -87.05, steps = 150\n",
      "10:12:27 [DEBUG] evaluate generation 2171: reward = -85.24, steps = 152\n",
      "10:12:28 [DEBUG] evaluate generation 2172: reward = -85.56, steps = 153\n",
      "10:12:32 [DEBUG] evaluate generation 2173: reward = -85.30, steps = 150\n",
      "10:12:34 [DEBUG] evaluate generation 2174: reward = -85.15, steps = 149\n",
      "10:12:36 [DEBUG] evaluate generation 2175: reward = -85.51, steps = 143\n",
      "10:12:38 [DEBUG] evaluate generation 2176: reward = -85.21, steps = 151\n",
      "10:12:40 [DEBUG] evaluate generation 2177: reward = -85.73, steps = 146\n",
      "10:12:41 [DEBUG] evaluate generation 2178: reward = -85.87, steps = 154\n",
      "10:12:43 [DEBUG] evaluate generation 2179: reward = -85.28, steps = 145\n",
      "10:12:44 [DEBUG] evaluate generation 2180: reward = -85.33, steps = 154\n",
      "10:12:47 [DEBUG] evaluate generation 2181: reward = -85.51, steps = 156\n",
      "10:12:49 [DEBUG] evaluate generation 2182: reward = -86.15, steps = 145\n",
      "10:12:51 [DEBUG] evaluate generation 2183: reward = -85.47, steps = 148\n",
      "10:12:53 [DEBUG] evaluate generation 2184: reward = -85.78, steps = 156\n",
      "10:12:54 [DEBUG] evaluate generation 2185: reward = -85.12, steps = 153\n",
      "10:12:56 [DEBUG] evaluate generation 2186: reward = -84.98, steps = 152\n",
      "10:12:59 [DEBUG] evaluate generation 2187: reward = -86.72, steps = 149\n",
      "10:13:01 [DEBUG] evaluate generation 2188: reward = -87.35, steps = 147\n",
      "10:13:02 [DEBUG] evaluate generation 2189: reward = -85.31, steps = 153\n",
      "10:13:04 [DEBUG] evaluate generation 2190: reward = -85.33, steps = 153\n",
      "10:13:06 [DEBUG] evaluate generation 2191: reward = -85.21, steps = 151\n",
      "10:13:10 [DEBUG] evaluate generation 2192: reward = -85.23, steps = 142\n",
      "10:13:11 [DEBUG] evaluate generation 2193: reward = -86.09, steps = 154\n",
      "10:13:13 [DEBUG] evaluate generation 2194: reward = -84.79, steps = 145\n",
      "10:13:15 [DEBUG] evaluate generation 2195: reward = -85.48, steps = 156\n",
      "10:13:18 [DEBUG] evaluate generation 2196: reward = -84.71, steps = 155\n",
      "10:13:20 [DEBUG] evaluate generation 2197: reward = -84.87, steps = 150\n",
      "10:13:23 [DEBUG] evaluate generation 2198: reward = -85.45, steps = 149\n",
      "10:13:24 [DEBUG] evaluate generation 2199: reward = -85.21, steps = 155\n",
      "10:13:26 [DEBUG] evaluate generation 2200: reward = -85.00, steps = 150\n",
      "10:13:28 [DEBUG] evaluate generation 2201: reward = -85.52, steps = 144\n",
      "10:13:29 [DEBUG] evaluate generation 2202: reward = -86.11, steps = 151\n",
      "10:13:31 [DEBUG] evaluate generation 2203: reward = -84.98, steps = 150\n",
      "10:13:35 [DEBUG] evaluate generation 2204: reward = -85.93, steps = 149\n",
      "10:13:37 [DEBUG] evaluate generation 2205: reward = -85.43, steps = 149\n",
      "10:13:39 [DEBUG] evaluate generation 2206: reward = -87.15, steps = 153\n",
      "10:13:41 [DEBUG] evaluate generation 2207: reward = -85.46, steps = 152\n",
      "10:13:43 [DEBUG] evaluate generation 2208: reward = -85.49, steps = 147\n",
      "10:13:45 [DEBUG] evaluate generation 2209: reward = -85.19, steps = 150\n",
      "10:13:46 [DEBUG] evaluate generation 2210: reward = -85.02, steps = 153\n",
      "10:13:48 [DEBUG] evaluate generation 2211: reward = -85.23, steps = 157\n",
      "10:13:50 [DEBUG] evaluate generation 2212: reward = -85.11, steps = 150\n",
      "10:13:52 [DEBUG] evaluate generation 2213: reward = -85.47, steps = 149\n",
      "10:13:54 [DEBUG] evaluate generation 2214: reward = -85.73, steps = 146\n",
      "10:13:56 [DEBUG] evaluate generation 2215: reward = -85.13, steps = 150\n",
      "10:13:57 [DEBUG] evaluate generation 2216: reward = -85.55, steps = 140\n",
      "10:13:59 [DEBUG] evaluate generation 2217: reward = -85.25, steps = 146\n",
      "10:14:01 [DEBUG] evaluate generation 2218: reward = -85.23, steps = 153\n",
      "10:14:03 [DEBUG] evaluate generation 2219: reward = -85.42, steps = 160\n",
      "10:14:04 [DEBUG] evaluate generation 2220: reward = -85.45, steps = 136\n",
      "10:14:06 [DEBUG] evaluate generation 2221: reward = -84.72, steps = 154\n",
      "10:14:08 [DEBUG] evaluate generation 2222: reward = -86.32, steps = 152\n",
      "10:14:10 [DEBUG] evaluate generation 2223: reward = -84.77, steps = 148\n",
      "10:14:14 [DEBUG] evaluate generation 2224: reward = -85.16, steps = 150\n",
      "10:14:16 [DEBUG] evaluate generation 2225: reward = -84.74, steps = 150\n",
      "10:14:18 [DEBUG] evaluate generation 2226: reward = -85.32, steps = 147\n",
      "10:14:19 [DEBUG] evaluate generation 2227: reward = -85.19, steps = 154\n",
      "10:14:21 [DEBUG] evaluate generation 2228: reward = -85.28, steps = 142\n",
      "10:14:22 [DEBUG] evaluate generation 2229: reward = -85.36, steps = 150\n",
      "10:14:25 [DEBUG] evaluate generation 2230: reward = -84.68, steps = 148\n",
      "10:14:27 [DEBUG] evaluate generation 2231: reward = -85.03, steps = 156\n",
      "10:14:29 [DEBUG] evaluate generation 2232: reward = -85.62, steps = 146\n",
      "10:14:31 [DEBUG] evaluate generation 2233: reward = -85.41, steps = 149\n",
      "10:14:32 [DEBUG] evaluate generation 2234: reward = -84.70, steps = 159\n",
      "10:14:34 [DEBUG] evaluate generation 2235: reward = -85.07, steps = 155\n",
      "10:14:35 [DEBUG] evaluate generation 2236: reward = -85.01, steps = 148\n",
      "10:14:38 [DEBUG] evaluate generation 2237: reward = -84.99, steps = 161\n",
      "10:14:40 [DEBUG] evaluate generation 2238: reward = -84.85, steps = 154\n",
      "10:14:42 [DEBUG] evaluate generation 2239: reward = -84.89, steps = 155\n",
      "10:14:44 [DEBUG] evaluate generation 2240: reward = -84.87, steps = 151\n",
      "10:14:47 [DEBUG] evaluate generation 2241: reward = -85.33, steps = 153\n",
      "10:14:50 [DEBUG] evaluate generation 2242: reward = -84.92, steps = 156\n",
      "10:14:51 [DEBUG] evaluate generation 2243: reward = -84.68, steps = 150\n",
      "10:14:53 [DEBUG] evaluate generation 2244: reward = -85.13, steps = 156\n",
      "10:14:57 [DEBUG] evaluate generation 2245: reward = -84.09, steps = 151\n",
      "10:15:00 [DEBUG] evaluate generation 2246: reward = -84.72, steps = 153\n",
      "10:15:02 [DEBUG] evaluate generation 2247: reward = -88.24, steps = 162\n",
      "10:15:03 [DEBUG] evaluate generation 2248: reward = -84.84, steps = 144\n",
      "10:15:05 [DEBUG] evaluate generation 2249: reward = -85.24, steps = 158\n",
      "10:15:07 [DEBUG] evaluate generation 2250: reward = -84.84, steps = 159\n",
      "10:15:08 [DEBUG] evaluate generation 2251: reward = -84.61, steps = 156\n",
      "10:15:10 [DEBUG] evaluate generation 2252: reward = -85.02, steps = 152\n",
      "10:15:12 [DEBUG] evaluate generation 2253: reward = -84.93, steps = 154\n",
      "10:15:13 [DEBUG] evaluate generation 2254: reward = -85.13, steps = 149\n",
      "10:15:15 [DEBUG] evaluate generation 2255: reward = -85.24, steps = 151\n",
      "10:15:17 [DEBUG] evaluate generation 2256: reward = -85.29, steps = 150\n",
      "10:15:20 [DEBUG] evaluate generation 2257: reward = -85.51, steps = 153\n",
      "10:15:21 [DEBUG] evaluate generation 2258: reward = -84.81, steps = 150\n",
      "10:15:24 [DEBUG] evaluate generation 2259: reward = -85.33, steps = 151\n",
      "10:15:26 [DEBUG] evaluate generation 2260: reward = -85.28, steps = 152\n",
      "10:15:28 [DEBUG] evaluate generation 2261: reward = -86.63, steps = 152\n",
      "10:15:30 [DEBUG] evaluate generation 2262: reward = -85.21, steps = 153\n",
      "10:15:32 [DEBUG] evaluate generation 2263: reward = -84.82, steps = 151\n",
      "10:15:33 [DEBUG] evaluate generation 2264: reward = -85.13, steps = 153\n",
      "10:15:35 [DEBUG] evaluate generation 2265: reward = -85.13, steps = 152\n",
      "10:15:38 [DEBUG] evaluate generation 2266: reward = -85.04, steps = 149\n",
      "10:15:40 [DEBUG] evaluate generation 2267: reward = -84.81, steps = 151\n",
      "10:15:42 [DEBUG] evaluate generation 2268: reward = -85.07, steps = 150\n",
      "10:15:46 [DEBUG] evaluate generation 2269: reward = -85.86, steps = 140\n",
      "10:15:48 [DEBUG] evaluate generation 2270: reward = -85.84, steps = 149\n",
      "10:15:50 [DEBUG] evaluate generation 2271: reward = -85.23, steps = 149\n",
      "10:15:51 [DEBUG] evaluate generation 2272: reward = -85.24, steps = 151\n",
      "10:15:53 [DEBUG] evaluate generation 2273: reward = -85.04, steps = 151\n",
      "10:15:55 [DEBUG] evaluate generation 2274: reward = -85.13, steps = 153\n",
      "10:15:58 [DEBUG] evaluate generation 2275: reward = -85.82, steps = 153\n",
      "10:16:00 [DEBUG] evaluate generation 2276: reward = -85.05, steps = 141\n",
      "10:16:01 [DEBUG] evaluate generation 2277: reward = -85.30, steps = 147\n",
      "10:16:03 [DEBUG] evaluate generation 2278: reward = -84.98, steps = 160\n",
      "10:16:05 [DEBUG] evaluate generation 2279: reward = -85.08, steps = 159\n",
      "10:16:07 [DEBUG] evaluate generation 2280: reward = -85.20, steps = 150\n",
      "10:16:09 [DEBUG] evaluate generation 2281: reward = -85.06, steps = 158\n",
      "10:16:11 [DEBUG] evaluate generation 2282: reward = -85.58, steps = 152\n",
      "10:16:15 [DEBUG] evaluate generation 2283: reward = -85.12, steps = 154\n",
      "10:16:16 [DEBUG] evaluate generation 2284: reward = -85.23, steps = 155\n",
      "10:16:19 [DEBUG] evaluate generation 2285: reward = -85.02, steps = 151\n",
      "10:16:21 [DEBUG] evaluate generation 2286: reward = -84.99, steps = 144\n",
      "10:16:25 [DEBUG] evaluate generation 2287: reward = -84.80, steps = 153\n",
      "10:16:26 [DEBUG] evaluate generation 2288: reward = -85.23, steps = 157\n",
      "10:16:28 [DEBUG] evaluate generation 2289: reward = -85.00, steps = 153\n",
      "10:16:29 [DEBUG] evaluate generation 2290: reward = -85.51, steps = 152\n",
      "10:16:31 [DEBUG] evaluate generation 2291: reward = -85.13, steps = 154\n",
      "10:16:33 [DEBUG] evaluate generation 2292: reward = -85.19, steps = 151\n",
      "10:16:35 [DEBUG] evaluate generation 2293: reward = -85.49, steps = 153\n",
      "10:16:37 [DEBUG] evaluate generation 2294: reward = -86.13, steps = 152\n",
      "10:16:40 [DEBUG] evaluate generation 2295: reward = -84.95, steps = 164\n",
      "10:16:42 [DEBUG] evaluate generation 2296: reward = -85.11, steps = 152\n",
      "10:16:43 [DEBUG] evaluate generation 2297: reward = -85.14, steps = 155\n",
      "10:16:46 [DEBUG] evaluate generation 2298: reward = -84.95, steps = 153\n",
      "10:16:48 [DEBUG] evaluate generation 2299: reward = -85.44, steps = 154\n",
      "10:16:51 [DEBUG] evaluate generation 2300: reward = -85.55, steps = 154\n",
      "10:16:53 [DEBUG] evaluate generation 2301: reward = -85.15, steps = 149\n",
      "10:16:54 [DEBUG] evaluate generation 2302: reward = -85.27, steps = 148\n",
      "10:16:56 [DEBUG] evaluate generation 2303: reward = -84.95, steps = 153\n",
      "10:16:59 [DEBUG] evaluate generation 2304: reward = -84.89, steps = 158\n",
      "10:17:01 [DEBUG] evaluate generation 2305: reward = -85.14, steps = 161\n",
      "10:17:03 [DEBUG] evaluate generation 2306: reward = -85.25, steps = 151\n",
      "10:17:05 [DEBUG] evaluate generation 2307: reward = -85.08, steps = 156\n",
      "10:17:06 [DEBUG] evaluate generation 2308: reward = -85.26, steps = 155\n",
      "10:17:08 [DEBUG] evaluate generation 2309: reward = -84.64, steps = 150\n",
      "10:17:10 [DEBUG] evaluate generation 2310: reward = -84.84, steps = 160\n",
      "10:17:13 [DEBUG] evaluate generation 2311: reward = -84.69, steps = 157\n",
      "10:17:16 [DEBUG] evaluate generation 2312: reward = -85.24, steps = 151\n",
      "10:17:17 [DEBUG] evaluate generation 2313: reward = -84.77, steps = 152\n",
      "10:17:20 [DEBUG] evaluate generation 2314: reward = -85.10, steps = 147\n",
      "10:17:21 [DEBUG] evaluate generation 2315: reward = -84.99, steps = 156\n",
      "10:17:23 [DEBUG] evaluate generation 2316: reward = -85.51, steps = 160\n",
      "10:17:24 [DEBUG] evaluate generation 2317: reward = -85.42, steps = 155\n",
      "10:17:28 [DEBUG] evaluate generation 2318: reward = -85.17, steps = 155\n",
      "10:17:30 [DEBUG] evaluate generation 2319: reward = -84.76, steps = 153\n",
      "10:17:33 [DEBUG] evaluate generation 2320: reward = -84.96, steps = 152\n",
      "10:17:35 [DEBUG] evaluate generation 2321: reward = -85.05, steps = 153\n",
      "10:17:36 [DEBUG] evaluate generation 2322: reward = -85.05, steps = 153\n",
      "10:17:38 [DEBUG] evaluate generation 2323: reward = -85.08, steps = 151\n",
      "10:17:39 [DEBUG] evaluate generation 2324: reward = -85.06, steps = 151\n",
      "10:17:41 [DEBUG] evaluate generation 2325: reward = -85.56, steps = 152\n",
      "10:17:43 [DEBUG] evaluate generation 2326: reward = -84.86, steps = 160\n",
      "10:17:45 [DEBUG] evaluate generation 2327: reward = -85.03, steps = 160\n",
      "10:17:48 [DEBUG] evaluate generation 2328: reward = -85.30, steps = 160\n",
      "10:17:49 [DEBUG] evaluate generation 2329: reward = -85.03, steps = 158\n",
      "10:17:51 [DEBUG] evaluate generation 2330: reward = -85.20, steps = 161\n",
      "10:17:53 [DEBUG] evaluate generation 2331: reward = -85.03, steps = 153\n",
      "10:17:54 [DEBUG] evaluate generation 2332: reward = -85.28, steps = 159\n",
      "10:17:56 [DEBUG] evaluate generation 2333: reward = -85.11, steps = 163\n",
      "10:17:58 [DEBUG] evaluate generation 2334: reward = -84.55, steps = 163\n",
      "10:18:01 [DEBUG] evaluate generation 2335: reward = -85.68, steps = 161\n",
      "10:18:03 [DEBUG] evaluate generation 2336: reward = -84.91, steps = 156\n",
      "10:18:07 [DEBUG] evaluate generation 2337: reward = -84.91, steps = 166\n",
      "10:18:09 [DEBUG] evaluate generation 2338: reward = -85.12, steps = 156\n",
      "10:18:11 [DEBUG] evaluate generation 2339: reward = -85.09, steps = 156\n",
      "10:18:13 [DEBUG] evaluate generation 2340: reward = -84.88, steps = 155\n",
      "10:18:15 [DEBUG] evaluate generation 2341: reward = -84.93, steps = 162\n",
      "10:18:18 [DEBUG] evaluate generation 2342: reward = -85.15, steps = 160\n",
      "10:18:22 [DEBUG] evaluate generation 2343: reward = -85.49, steps = 158\n",
      "10:18:23 [DEBUG] evaluate generation 2344: reward = -84.82, steps = 163\n",
      "10:18:25 [DEBUG] evaluate generation 2345: reward = -84.82, steps = 165\n",
      "10:18:27 [DEBUG] evaluate generation 2346: reward = -85.28, steps = 161\n",
      "10:18:28 [DEBUG] evaluate generation 2347: reward = -84.90, steps = 160\n",
      "10:18:30 [DEBUG] evaluate generation 2348: reward = -84.59, steps = 158\n",
      "10:18:31 [DEBUG] evaluate generation 2349: reward = -84.46, steps = 159\n",
      "10:18:33 [DEBUG] evaluate generation 2350: reward = -85.44, steps = 157\n",
      "10:18:34 [DEBUG] evaluate generation 2351: reward = -85.22, steps = 161\n",
      "10:18:36 [DEBUG] evaluate generation 2352: reward = -85.44, steps = 173\n",
      "10:18:37 [DEBUG] evaluate generation 2353: reward = -85.54, steps = 151\n",
      "10:18:40 [DEBUG] evaluate generation 2354: reward = -84.84, steps = 156\n",
      "10:18:41 [DEBUG] evaluate generation 2355: reward = -85.27, steps = 151\n",
      "10:18:43 [DEBUG] evaluate generation 2356: reward = -85.71, steps = 157\n",
      "10:18:44 [DEBUG] evaluate generation 2357: reward = -85.20, steps = 160\n",
      "10:18:47 [DEBUG] evaluate generation 2358: reward = -85.63, steps = 161\n",
      "10:18:48 [DEBUG] evaluate generation 2359: reward = -85.68, steps = 153\n",
      "10:18:50 [DEBUG] evaluate generation 2360: reward = -84.95, steps = 150\n",
      "10:18:51 [DEBUG] evaluate generation 2361: reward = -85.25, steps = 154\n",
      "10:18:53 [DEBUG] evaluate generation 2362: reward = -85.05, steps = 156\n",
      "10:18:54 [DEBUG] evaluate generation 2363: reward = -85.13, steps = 157\n",
      "10:18:56 [DEBUG] evaluate generation 2364: reward = -85.27, steps = 152\n",
      "10:18:58 [DEBUG] evaluate generation 2365: reward = -85.13, steps = 155\n",
      "10:19:00 [DEBUG] evaluate generation 2366: reward = -84.99, steps = 162\n",
      "10:19:01 [DEBUG] evaluate generation 2367: reward = -84.98, steps = 149\n",
      "10:19:04 [DEBUG] evaluate generation 2368: reward = -85.29, steps = 148\n",
      "10:19:05 [DEBUG] evaluate generation 2369: reward = -85.31, steps = 153\n",
      "10:19:07 [DEBUG] evaluate generation 2370: reward = -85.03, steps = 150\n",
      "10:19:08 [DEBUG] evaluate generation 2371: reward = -85.06, steps = 154\n",
      "10:19:11 [DEBUG] evaluate generation 2372: reward = -85.26, steps = 156\n",
      "10:19:12 [DEBUG] evaluate generation 2373: reward = -85.25, steps = 151\n",
      "10:19:14 [DEBUG] evaluate generation 2374: reward = -85.17, steps = 148\n",
      "10:19:16 [DEBUG] evaluate generation 2375: reward = -85.26, steps = 152\n",
      "10:19:17 [DEBUG] evaluate generation 2376: reward = -87.54, steps = 152\n",
      "10:19:19 [DEBUG] evaluate generation 2377: reward = -85.45, steps = 153\n",
      "10:19:21 [DEBUG] evaluate generation 2378: reward = -85.40, steps = 154\n",
      "10:19:24 [DEBUG] evaluate generation 2379: reward = -84.93, steps = 151\n",
      "10:19:28 [DEBUG] evaluate generation 2380: reward = -84.90, steps = 155\n",
      "10:19:31 [DEBUG] evaluate generation 2381: reward = -84.93, steps = 154\n",
      "10:19:33 [DEBUG] evaluate generation 2382: reward = -85.35, steps = 150\n",
      "10:19:35 [DEBUG] evaluate generation 2383: reward = -85.34, steps = 154\n",
      "10:19:36 [DEBUG] evaluate generation 2384: reward = -85.34, steps = 151\n",
      "10:19:38 [DEBUG] evaluate generation 2385: reward = -85.62, steps = 155\n",
      "10:19:40 [DEBUG] evaluate generation 2386: reward = -85.38, steps = 149\n",
      "10:19:44 [DEBUG] evaluate generation 2387: reward = -85.85, steps = 154\n",
      "10:19:45 [DEBUG] evaluate generation 2388: reward = -85.51, steps = 150\n",
      "10:19:47 [DEBUG] evaluate generation 2389: reward = -85.34, steps = 151\n",
      "10:19:49 [DEBUG] evaluate generation 2390: reward = -85.51, steps = 149\n",
      "10:19:50 [DEBUG] evaluate generation 2391: reward = -85.14, steps = 158\n",
      "10:19:52 [DEBUG] evaluate generation 2392: reward = -85.00, steps = 158\n",
      "10:19:55 [DEBUG] evaluate generation 2393: reward = -85.08, steps = 149\n",
      "10:19:57 [DEBUG] evaluate generation 2394: reward = -84.75, steps = 153\n",
      "10:19:59 [DEBUG] evaluate generation 2395: reward = -85.49, steps = 152\n",
      "10:20:01 [DEBUG] evaluate generation 2396: reward = -85.52, steps = 158\n",
      "10:20:02 [DEBUG] evaluate generation 2397: reward = -84.84, steps = 152\n",
      "10:20:05 [DEBUG] evaluate generation 2398: reward = -84.53, steps = 156\n",
      "10:20:08 [DEBUG] evaluate generation 2399: reward = -84.90, steps = 155\n",
      "10:20:10 [DEBUG] evaluate generation 2400: reward = -84.93, steps = 156\n",
      "10:20:12 [DEBUG] evaluate generation 2401: reward = -85.10, steps = 160\n",
      "10:20:14 [DEBUG] evaluate generation 2402: reward = -84.89, steps = 160\n",
      "10:20:19 [DEBUG] evaluate generation 2403: reward = -84.86, steps = 156\n",
      "10:20:20 [DEBUG] evaluate generation 2404: reward = -85.02, steps = 154\n",
      "10:20:23 [DEBUG] evaluate generation 2405: reward = -85.22, steps = 163\n",
      "10:20:26 [DEBUG] evaluate generation 2406: reward = -84.74, steps = 158\n",
      "10:20:29 [DEBUG] evaluate generation 2407: reward = -84.87, steps = 158\n",
      "10:20:30 [DEBUG] evaluate generation 2408: reward = -85.74, steps = 175\n",
      "10:20:32 [DEBUG] evaluate generation 2409: reward = -84.75, steps = 157\n",
      "10:20:35 [DEBUG] evaluate generation 2410: reward = -84.84, steps = 163\n",
      "10:20:38 [DEBUG] evaluate generation 2411: reward = -84.76, steps = 158\n",
      "10:20:40 [DEBUG] evaluate generation 2412: reward = -85.17, steps = 154\n",
      "10:20:42 [DEBUG] evaluate generation 2413: reward = -85.31, steps = 154\n",
      "10:20:44 [DEBUG] evaluate generation 2414: reward = -85.46, steps = 155\n",
      "10:20:45 [DEBUG] evaluate generation 2415: reward = -87.18, steps = 158\n",
      "10:20:47 [DEBUG] evaluate generation 2416: reward = -85.23, steps = 156\n",
      "10:20:49 [DEBUG] evaluate generation 2417: reward = -85.48, steps = 152\n",
      "10:20:52 [DEBUG] evaluate generation 2418: reward = -85.25, steps = 154\n",
      "10:20:55 [DEBUG] evaluate generation 2419: reward = -86.09, steps = 156\n",
      "10:20:57 [DEBUG] evaluate generation 2420: reward = -85.21, steps = 153\n",
      "10:21:00 [DEBUG] evaluate generation 2421: reward = -85.26, steps = 155\n",
      "10:21:02 [DEBUG] evaluate generation 2422: reward = -85.57, steps = 163\n",
      "10:21:05 [DEBUG] evaluate generation 2423: reward = -87.37, steps = 146\n",
      "10:21:06 [DEBUG] evaluate generation 2424: reward = -85.29, steps = 161\n",
      "10:21:08 [DEBUG] evaluate generation 2425: reward = -86.05, steps = 156\n",
      "10:21:09 [DEBUG] evaluate generation 2426: reward = -85.21, steps = 154\n",
      "10:21:11 [DEBUG] evaluate generation 2427: reward = -84.95, steps = 157\n",
      "10:21:13 [DEBUG] evaluate generation 2428: reward = -85.13, steps = 147\n",
      "10:21:15 [DEBUG] evaluate generation 2429: reward = -85.52, steps = 157\n",
      "10:21:17 [DEBUG] evaluate generation 2430: reward = -84.94, steps = 159\n",
      "10:21:19 [DEBUG] evaluate generation 2431: reward = -85.41, steps = 161\n",
      "10:21:21 [DEBUG] evaluate generation 2432: reward = -85.43, steps = 153\n",
      "10:21:23 [DEBUG] evaluate generation 2433: reward = -85.45, steps = 151\n",
      "10:21:25 [DEBUG] evaluate generation 2434: reward = -85.22, steps = 157\n",
      "10:21:27 [DEBUG] evaluate generation 2435: reward = -85.44, steps = 157\n",
      "10:21:29 [DEBUG] evaluate generation 2436: reward = -85.17, steps = 157\n",
      "10:21:30 [DEBUG] evaluate generation 2437: reward = -85.09, steps = 160\n",
      "10:21:32 [DEBUG] evaluate generation 2438: reward = -84.82, steps = 162\n",
      "10:21:33 [DEBUG] evaluate generation 2439: reward = -85.19, steps = 161\n",
      "10:21:35 [DEBUG] evaluate generation 2440: reward = -85.36, steps = 160\n",
      "10:21:38 [DEBUG] evaluate generation 2441: reward = -85.08, steps = 160\n",
      "10:21:40 [DEBUG] evaluate generation 2442: reward = -85.03, steps = 160\n",
      "10:21:42 [DEBUG] evaluate generation 2443: reward = -85.42, steps = 153\n",
      "10:21:43 [DEBUG] evaluate generation 2444: reward = -84.77, steps = 154\n",
      "10:21:45 [DEBUG] evaluate generation 2445: reward = -84.49, steps = 160\n",
      "10:21:46 [DEBUG] evaluate generation 2446: reward = -84.80, steps = 163\n",
      "10:21:48 [DEBUG] evaluate generation 2447: reward = -84.84, steps = 151\n",
      "10:21:50 [DEBUG] evaluate generation 2448: reward = -85.14, steps = 160\n",
      "10:21:52 [DEBUG] evaluate generation 2449: reward = -84.98, steps = 156\n",
      "10:21:56 [DEBUG] evaluate generation 2450: reward = -84.50, steps = 160\n",
      "10:21:59 [DEBUG] evaluate generation 2451: reward = -85.02, steps = 156\n",
      "10:22:01 [DEBUG] evaluate generation 2452: reward = -84.86, steps = 161\n",
      "10:22:02 [DEBUG] evaluate generation 2453: reward = -84.92, steps = 157\n",
      "10:22:05 [DEBUG] evaluate generation 2454: reward = -85.45, steps = 160\n",
      "10:22:06 [DEBUG] evaluate generation 2455: reward = -84.93, steps = 155\n",
      "10:22:10 [DEBUG] evaluate generation 2456: reward = -84.87, steps = 150\n",
      "10:22:14 [DEBUG] evaluate generation 2457: reward = -84.88, steps = 158\n",
      "10:22:16 [DEBUG] evaluate generation 2458: reward = -85.03, steps = 153\n",
      "10:22:18 [DEBUG] evaluate generation 2459: reward = -84.46, steps = 152\n",
      "10:22:19 [DEBUG] evaluate generation 2460: reward = -85.02, steps = 154\n",
      "10:22:21 [DEBUG] evaluate generation 2461: reward = -85.02, steps = 149\n",
      "10:22:23 [DEBUG] evaluate generation 2462: reward = -84.95, steps = 150\n",
      "10:22:25 [DEBUG] evaluate generation 2463: reward = -84.61, steps = 155\n",
      "10:22:28 [DEBUG] evaluate generation 2464: reward = -84.83, steps = 152\n",
      "10:22:31 [DEBUG] evaluate generation 2465: reward = -85.28, steps = 148\n",
      "10:22:32 [DEBUG] evaluate generation 2466: reward = -85.55, steps = 150\n",
      "10:22:35 [DEBUG] evaluate generation 2467: reward = -85.26, steps = 153\n",
      "10:22:36 [DEBUG] evaluate generation 2468: reward = -84.99, steps = 154\n",
      "10:22:38 [DEBUG] evaluate generation 2469: reward = -84.86, steps = 156\n",
      "10:22:40 [DEBUG] evaluate generation 2470: reward = -84.81, steps = 156\n",
      "10:22:43 [DEBUG] evaluate generation 2471: reward = -85.01, steps = 153\n",
      "10:22:47 [DEBUG] evaluate generation 2472: reward = -85.08, steps = 154\n",
      "10:22:49 [DEBUG] evaluate generation 2473: reward = -85.43, steps = 153\n",
      "10:22:52 [DEBUG] evaluate generation 2474: reward = -85.22, steps = 151\n",
      "10:22:54 [DEBUG] evaluate generation 2475: reward = -85.35, steps = 152\n",
      "10:22:56 [DEBUG] evaluate generation 2476: reward = -85.22, steps = 154\n",
      "10:22:58 [DEBUG] evaluate generation 2477: reward = -85.57, steps = 157\n",
      "10:23:01 [DEBUG] evaluate generation 2478: reward = -85.20, steps = 150\n",
      "10:23:03 [DEBUG] evaluate generation 2479: reward = -85.84, steps = 159\n",
      "10:23:05 [DEBUG] evaluate generation 2480: reward = -85.27, steps = 150\n",
      "10:23:07 [DEBUG] evaluate generation 2481: reward = -85.12, steps = 154\n",
      "10:23:12 [DEBUG] evaluate generation 2482: reward = -85.19, steps = 151\n",
      "10:23:15 [DEBUG] evaluate generation 2483: reward = -84.99, steps = 150\n",
      "10:23:16 [DEBUG] evaluate generation 2484: reward = -86.21, steps = 151\n",
      "10:23:19 [DEBUG] evaluate generation 2485: reward = -85.42, steps = 153\n",
      "10:23:21 [DEBUG] evaluate generation 2486: reward = -85.46, steps = 148\n",
      "10:23:24 [DEBUG] evaluate generation 2487: reward = -85.95, steps = 151\n",
      "10:23:25 [DEBUG] evaluate generation 2488: reward = -86.17, steps = 159\n",
      "10:23:27 [DEBUG] evaluate generation 2489: reward = -85.30, steps = 152\n",
      "10:23:29 [DEBUG] evaluate generation 2490: reward = -85.22, steps = 153\n",
      "10:23:30 [DEBUG] evaluate generation 2491: reward = -85.28, steps = 156\n",
      "10:23:32 [DEBUG] evaluate generation 2492: reward = -85.17, steps = 154\n",
      "10:23:34 [DEBUG] evaluate generation 2493: reward = -86.22, steps = 155\n",
      "10:23:36 [DEBUG] evaluate generation 2494: reward = -84.97, steps = 149\n",
      "10:23:39 [DEBUG] evaluate generation 2495: reward = -84.85, steps = 158\n",
      "10:23:40 [DEBUG] evaluate generation 2496: reward = -85.47, steps = 165\n",
      "10:23:43 [DEBUG] evaluate generation 2497: reward = -85.40, steps = 159\n",
      "10:23:44 [DEBUG] evaluate generation 2498: reward = -85.31, steps = 153\n",
      "10:23:49 [DEBUG] evaluate generation 2499: reward = -84.85, steps = 166\n",
      "10:23:50 [DEBUG] evaluate generation 2500: reward = -85.36, steps = 155\n",
      "10:23:52 [DEBUG] evaluate generation 2501: reward = -85.02, steps = 160\n",
      "10:23:53 [DEBUG] evaluate generation 2502: reward = -84.58, steps = 153\n",
      "10:23:55 [DEBUG] evaluate generation 2503: reward = -84.78, steps = 153\n",
      "10:23:56 [DEBUG] evaluate generation 2504: reward = -85.19, steps = 157\n",
      "10:23:58 [DEBUG] evaluate generation 2505: reward = -84.99, steps = 154\n",
      "10:24:00 [DEBUG] evaluate generation 2506: reward = -84.98, steps = 158\n",
      "10:24:03 [DEBUG] evaluate generation 2507: reward = -85.29, steps = 152\n",
      "10:24:04 [DEBUG] evaluate generation 2508: reward = -84.95, steps = 155\n",
      "10:24:06 [DEBUG] evaluate generation 2509: reward = -85.01, steps = 155\n",
      "10:24:11 [DEBUG] evaluate generation 2510: reward = -85.10, steps = 159\n",
      "10:24:13 [DEBUG] evaluate generation 2511: reward = -85.28, steps = 155\n",
      "10:24:15 [DEBUG] evaluate generation 2512: reward = -84.96, steps = 157\n",
      "10:24:17 [DEBUG] evaluate generation 2513: reward = -84.96, steps = 156\n",
      "10:24:20 [DEBUG] evaluate generation 2514: reward = -85.04, steps = 164\n",
      "10:24:22 [DEBUG] evaluate generation 2515: reward = -85.24, steps = 156\n",
      "10:24:24 [DEBUG] evaluate generation 2516: reward = -85.18, steps = 164\n",
      "10:24:26 [DEBUG] evaluate generation 2517: reward = -84.84, steps = 160\n",
      "10:24:29 [DEBUG] evaluate generation 2518: reward = -84.63, steps = 152\n",
      "10:24:30 [DEBUG] evaluate generation 2519: reward = -84.92, steps = 154\n",
      "10:24:32 [DEBUG] evaluate generation 2520: reward = -84.95, steps = 160\n",
      "10:24:34 [DEBUG] evaluate generation 2521: reward = -85.52, steps = 158\n",
      "10:24:36 [DEBUG] evaluate generation 2522: reward = -84.85, steps = 161\n",
      "10:24:37 [DEBUG] evaluate generation 2523: reward = -84.75, steps = 156\n",
      "10:24:39 [DEBUG] evaluate generation 2524: reward = -85.27, steps = 171\n",
      "10:24:42 [DEBUG] evaluate generation 2525: reward = -84.55, steps = 160\n",
      "10:24:44 [DEBUG] evaluate generation 2526: reward = -84.89, steps = 160\n",
      "10:24:46 [DEBUG] evaluate generation 2527: reward = -85.83, steps = 165\n",
      "10:24:48 [DEBUG] evaluate generation 2528: reward = -84.85, steps = 159\n",
      "10:24:49 [DEBUG] evaluate generation 2529: reward = -84.66, steps = 150\n",
      "10:24:53 [DEBUG] evaluate generation 2530: reward = -84.97, steps = 155\n",
      "10:24:55 [DEBUG] evaluate generation 2531: reward = -84.80, steps = 154\n",
      "10:24:56 [DEBUG] evaluate generation 2532: reward = -84.87, steps = 156\n",
      "10:24:58 [DEBUG] evaluate generation 2533: reward = -84.73, steps = 158\n",
      "10:25:00 [DEBUG] evaluate generation 2534: reward = -85.26, steps = 168\n",
      "10:25:02 [DEBUG] evaluate generation 2535: reward = -85.01, steps = 157\n",
      "10:25:03 [DEBUG] evaluate generation 2536: reward = -84.91, steps = 152\n",
      "10:25:06 [DEBUG] evaluate generation 2537: reward = -85.08, steps = 162\n",
      "10:25:08 [DEBUG] evaluate generation 2538: reward = -85.15, steps = 152\n",
      "10:25:10 [DEBUG] evaluate generation 2539: reward = -85.03, steps = 152\n",
      "10:25:11 [DEBUG] evaluate generation 2540: reward = -85.18, steps = 162\n",
      "10:25:13 [DEBUG] evaluate generation 2541: reward = -84.80, steps = 162\n",
      "10:25:15 [DEBUG] evaluate generation 2542: reward = -85.02, steps = 153\n",
      "10:25:16 [DEBUG] evaluate generation 2543: reward = -84.83, steps = 155\n",
      "10:25:18 [DEBUG] evaluate generation 2544: reward = -85.14, steps = 157\n",
      "10:25:20 [DEBUG] evaluate generation 2545: reward = -85.07, steps = 155\n",
      "10:25:22 [DEBUG] evaluate generation 2546: reward = -84.70, steps = 158\n",
      "10:25:25 [DEBUG] evaluate generation 2547: reward = -84.68, steps = 155\n",
      "10:25:26 [DEBUG] evaluate generation 2548: reward = -84.62, steps = 157\n",
      "10:25:29 [DEBUG] evaluate generation 2549: reward = -84.84, steps = 155\n",
      "10:25:32 [DEBUG] evaluate generation 2550: reward = -84.97, steps = 157\n",
      "10:25:35 [DEBUG] evaluate generation 2551: reward = -84.51, steps = 155\n",
      "10:25:37 [DEBUG] evaluate generation 2552: reward = -85.18, steps = 154\n",
      "10:25:38 [DEBUG] evaluate generation 2553: reward = -84.47, steps = 159\n",
      "10:25:40 [DEBUG] evaluate generation 2554: reward = -84.90, steps = 158\n",
      "10:25:42 [DEBUG] evaluate generation 2555: reward = -85.10, steps = 148\n",
      "10:25:44 [DEBUG] evaluate generation 2556: reward = -85.11, steps = 153\n",
      "10:25:47 [DEBUG] evaluate generation 2557: reward = -85.09, steps = 163\n",
      "10:25:48 [DEBUG] evaluate generation 2558: reward = -84.90, steps = 165\n",
      "10:25:50 [DEBUG] evaluate generation 2559: reward = -85.15, steps = 154\n",
      "10:25:53 [DEBUG] evaluate generation 2560: reward = -85.12, steps = 146\n",
      "10:25:55 [DEBUG] evaluate generation 2561: reward = -84.83, steps = 159\n",
      "10:25:57 [DEBUG] evaluate generation 2562: reward = -84.97, steps = 151\n",
      "10:26:01 [DEBUG] evaluate generation 2563: reward = -85.08, steps = 153\n",
      "10:26:03 [DEBUG] evaluate generation 2564: reward = -84.47, steps = 152\n",
      "10:26:06 [DEBUG] evaluate generation 2565: reward = -85.34, steps = 150\n",
      "10:26:07 [DEBUG] evaluate generation 2566: reward = -84.86, steps = 155\n",
      "10:26:12 [DEBUG] evaluate generation 2567: reward = -84.60, steps = 155\n",
      "10:26:15 [DEBUG] evaluate generation 2568: reward = -85.05, steps = 160\n",
      "10:26:17 [DEBUG] evaluate generation 2569: reward = -85.13, steps = 170\n",
      "10:26:18 [DEBUG] evaluate generation 2570: reward = -84.74, steps = 148\n",
      "10:26:21 [DEBUG] evaluate generation 2571: reward = -86.46, steps = 157\n",
      "10:26:23 [DEBUG] evaluate generation 2572: reward = -85.24, steps = 161\n",
      "10:26:25 [DEBUG] evaluate generation 2573: reward = -85.24, steps = 153\n",
      "10:26:27 [DEBUG] evaluate generation 2574: reward = -85.18, steps = 154\n",
      "10:26:28 [DEBUG] evaluate generation 2575: reward = -84.86, steps = 152\n",
      "10:26:30 [DEBUG] evaluate generation 2576: reward = -85.54, steps = 149\n",
      "10:26:31 [DEBUG] evaluate generation 2577: reward = -85.04, steps = 157\n",
      "10:26:33 [DEBUG] evaluate generation 2578: reward = -84.87, steps = 146\n",
      "10:26:35 [DEBUG] evaluate generation 2579: reward = -85.24, steps = 148\n",
      "10:26:37 [DEBUG] evaluate generation 2580: reward = -85.23, steps = 152\n",
      "10:26:38 [DEBUG] evaluate generation 2581: reward = -85.28, steps = 146\n",
      "10:26:41 [DEBUG] evaluate generation 2582: reward = -85.15, steps = 147\n",
      "10:26:42 [DEBUG] evaluate generation 2583: reward = -85.02, steps = 147\n",
      "10:26:44 [DEBUG] evaluate generation 2584: reward = -85.10, steps = 161\n",
      "10:26:45 [DEBUG] evaluate generation 2585: reward = -85.00, steps = 155\n",
      "10:26:48 [DEBUG] evaluate generation 2586: reward = -85.29, steps = 153\n",
      "10:26:50 [DEBUG] evaluate generation 2587: reward = -84.86, steps = 149\n",
      "10:26:51 [DEBUG] evaluate generation 2588: reward = -85.13, steps = 151\n",
      "10:26:53 [DEBUG] evaluate generation 2589: reward = -84.90, steps = 149\n",
      "10:26:55 [DEBUG] evaluate generation 2590: reward = -85.00, steps = 156\n",
      "10:26:56 [DEBUG] evaluate generation 2591: reward = -85.22, steps = 157\n",
      "10:26:58 [DEBUG] evaluate generation 2592: reward = -84.85, steps = 153\n",
      "10:26:59 [DEBUG] evaluate generation 2593: reward = -85.27, steps = 154\n",
      "10:27:01 [DEBUG] evaluate generation 2594: reward = -84.71, steps = 152\n",
      "10:27:02 [DEBUG] evaluate generation 2595: reward = -85.27, steps = 152\n",
      "10:27:04 [DEBUG] evaluate generation 2596: reward = -85.55, steps = 171\n",
      "10:27:07 [DEBUG] evaluate generation 2597: reward = -85.21, steps = 150\n",
      "10:27:09 [DEBUG] evaluate generation 2598: reward = -85.16, steps = 150\n",
      "10:27:11 [DEBUG] evaluate generation 2599: reward = -85.15, steps = 153\n",
      "10:27:14 [DEBUG] evaluate generation 2600: reward = -84.92, steps = 160\n",
      "10:27:16 [DEBUG] evaluate generation 2601: reward = -84.78, steps = 151\n",
      "10:27:19 [DEBUG] evaluate generation 2602: reward = -84.99, steps = 149\n",
      "10:27:20 [DEBUG] evaluate generation 2603: reward = -84.98, steps = 148\n",
      "10:27:22 [DEBUG] evaluate generation 2604: reward = -85.35, steps = 145\n",
      "10:27:23 [DEBUG] evaluate generation 2605: reward = -85.36, steps = 149\n",
      "10:27:25 [DEBUG] evaluate generation 2606: reward = -84.92, steps = 152\n",
      "10:27:26 [DEBUG] evaluate generation 2607: reward = -84.96, steps = 154\n",
      "10:27:28 [DEBUG] evaluate generation 2608: reward = -85.24, steps = 151\n",
      "10:27:33 [DEBUG] evaluate generation 2609: reward = -85.45, steps = 152\n",
      "10:27:36 [DEBUG] evaluate generation 2610: reward = -85.23, steps = 150\n",
      "10:27:37 [DEBUG] evaluate generation 2611: reward = -84.92, steps = 153\n",
      "10:27:39 [DEBUG] evaluate generation 2612: reward = -85.22, steps = 151\n",
      "10:27:40 [DEBUG] evaluate generation 2613: reward = -84.91, steps = 152\n",
      "10:27:42 [DEBUG] evaluate generation 2614: reward = -84.96, steps = 152\n",
      "10:27:44 [DEBUG] evaluate generation 2615: reward = -85.24, steps = 149\n",
      "10:27:45 [DEBUG] evaluate generation 2616: reward = -85.04, steps = 150\n",
      "10:27:48 [DEBUG] evaluate generation 2617: reward = -84.89, steps = 154\n",
      "10:27:49 [DEBUG] evaluate generation 2618: reward = -84.88, steps = 152\n",
      "10:27:51 [DEBUG] evaluate generation 2619: reward = -85.12, steps = 152\n",
      "10:27:52 [DEBUG] evaluate generation 2620: reward = -84.94, steps = 152\n",
      "10:27:55 [DEBUG] evaluate generation 2621: reward = -85.00, steps = 148\n",
      "10:27:56 [DEBUG] evaluate generation 2622: reward = -84.88, steps = 152\n",
      "10:27:58 [DEBUG] evaluate generation 2623: reward = -84.80, steps = 156\n",
      "10:28:00 [DEBUG] evaluate generation 2624: reward = -84.94, steps = 146\n",
      "10:28:02 [DEBUG] evaluate generation 2625: reward = -86.06, steps = 156\n",
      "10:28:05 [DEBUG] evaluate generation 2626: reward = -84.92, steps = 149\n",
      "10:28:06 [DEBUG] evaluate generation 2627: reward = -85.12, steps = 151\n",
      "10:28:09 [DEBUG] evaluate generation 2628: reward = -85.14, steps = 149\n",
      "10:28:10 [DEBUG] evaluate generation 2629: reward = -84.68, steps = 149\n",
      "10:28:12 [DEBUG] evaluate generation 2630: reward = -85.34, steps = 157\n",
      "10:28:14 [DEBUG] evaluate generation 2631: reward = -85.55, steps = 152\n",
      "10:28:17 [DEBUG] evaluate generation 2632: reward = -85.06, steps = 150\n",
      "10:28:18 [DEBUG] evaluate generation 2633: reward = -84.95, steps = 152\n",
      "10:28:20 [DEBUG] evaluate generation 2634: reward = -85.04, steps = 149\n",
      "10:28:21 [DEBUG] evaluate generation 2635: reward = -85.09, steps = 153\n",
      "10:28:24 [DEBUG] evaluate generation 2636: reward = -84.90, steps = 150\n",
      "10:28:27 [DEBUG] evaluate generation 2637: reward = -85.43, steps = 149\n",
      "10:28:28 [DEBUG] evaluate generation 2638: reward = -85.47, steps = 148\n",
      "10:28:30 [DEBUG] evaluate generation 2639: reward = -85.03, steps = 148\n",
      "10:28:31 [DEBUG] evaluate generation 2640: reward = -85.12, steps = 149\n",
      "10:28:33 [DEBUG] evaluate generation 2641: reward = -85.03, steps = 152\n",
      "10:28:35 [DEBUG] evaluate generation 2642: reward = -85.17, steps = 147\n",
      "10:28:39 [DEBUG] evaluate generation 2643: reward = -85.23, steps = 150\n",
      "10:28:41 [DEBUG] evaluate generation 2644: reward = -84.48, steps = 153\n",
      "10:28:43 [DEBUG] evaluate generation 2645: reward = -85.34, steps = 155\n",
      "10:28:44 [DEBUG] evaluate generation 2646: reward = -85.13, steps = 148\n",
      "10:28:46 [DEBUG] evaluate generation 2647: reward = -85.58, steps = 159\n",
      "10:28:47 [DEBUG] evaluate generation 2648: reward = -86.25, steps = 153\n",
      "10:28:49 [DEBUG] evaluate generation 2649: reward = -85.00, steps = 149\n",
      "10:28:50 [DEBUG] evaluate generation 2650: reward = -85.32, steps = 152\n",
      "10:28:52 [DEBUG] evaluate generation 2651: reward = -85.05, steps = 149\n",
      "10:28:53 [DEBUG] evaluate generation 2652: reward = -85.38, steps = 156\n",
      "10:28:57 [DEBUG] evaluate generation 2653: reward = -84.75, steps = 153\n",
      "10:28:58 [DEBUG] evaluate generation 2654: reward = -85.99, steps = 153\n",
      "10:29:00 [DEBUG] evaluate generation 2655: reward = -85.35, steps = 159\n",
      "10:29:01 [DEBUG] evaluate generation 2656: reward = -85.28, steps = 152\n",
      "10:29:04 [DEBUG] evaluate generation 2657: reward = -85.13, steps = 148\n",
      "10:29:06 [DEBUG] evaluate generation 2658: reward = -85.64, steps = 148\n",
      "10:29:08 [DEBUG] evaluate generation 2659: reward = -85.04, steps = 152\n",
      "10:29:09 [DEBUG] evaluate generation 2660: reward = -84.82, steps = 153\n",
      "10:29:12 [DEBUG] evaluate generation 2661: reward = -85.20, steps = 146\n",
      "10:29:14 [DEBUG] evaluate generation 2662: reward = -84.72, steps = 153\n",
      "10:29:17 [DEBUG] evaluate generation 2663: reward = -85.10, steps = 157\n",
      "10:29:18 [DEBUG] evaluate generation 2664: reward = -85.00, steps = 153\n",
      "10:29:21 [DEBUG] evaluate generation 2665: reward = -85.11, steps = 149\n",
      "10:29:23 [DEBUG] evaluate generation 2666: reward = -84.82, steps = 146\n",
      "10:29:25 [DEBUG] evaluate generation 2667: reward = -85.08, steps = 152\n",
      "10:29:27 [DEBUG] evaluate generation 2668: reward = -84.87, steps = 145\n",
      "10:29:29 [DEBUG] evaluate generation 2669: reward = -84.60, steps = 149\n",
      "10:29:32 [DEBUG] evaluate generation 2670: reward = -85.02, steps = 157\n",
      "10:29:33 [DEBUG] evaluate generation 2671: reward = -85.05, steps = 151\n",
      "10:29:36 [DEBUG] evaluate generation 2672: reward = -84.59, steps = 156\n",
      "10:29:37 [DEBUG] evaluate generation 2673: reward = -84.93, steps = 146\n",
      "10:29:39 [DEBUG] evaluate generation 2674: reward = -84.83, steps = 152\n",
      "10:29:44 [DEBUG] evaluate generation 2675: reward = -84.31, steps = 154\n",
      "10:29:47 [DEBUG] evaluate generation 2676: reward = -84.60, steps = 151\n",
      "10:29:49 [DEBUG] evaluate generation 2677: reward = -85.70, steps = 155\n",
      "10:29:52 [DEBUG] evaluate generation 2678: reward = -84.81, steps = 156\n",
      "10:29:53 [DEBUG] evaluate generation 2679: reward = -84.76, steps = 158\n",
      "10:29:55 [DEBUG] evaluate generation 2680: reward = -85.06, steps = 165\n",
      "10:29:56 [DEBUG] evaluate generation 2681: reward = -85.19, steps = 154\n",
      "10:29:59 [DEBUG] evaluate generation 2682: reward = -84.70, steps = 155\n",
      "10:30:00 [DEBUG] evaluate generation 2683: reward = -84.91, steps = 161\n",
      "10:30:02 [DEBUG] evaluate generation 2684: reward = -84.94, steps = 158\n",
      "10:30:05 [DEBUG] evaluate generation 2685: reward = -84.80, steps = 164\n",
      "10:30:06 [DEBUG] evaluate generation 2686: reward = -84.78, steps = 158\n",
      "10:30:09 [DEBUG] evaluate generation 2687: reward = -85.00, steps = 155\n",
      "10:30:10 [DEBUG] evaluate generation 2688: reward = -84.76, steps = 154\n",
      "10:30:12 [DEBUG] evaluate generation 2689: reward = -85.07, steps = 156\n",
      "10:30:14 [DEBUG] evaluate generation 2690: reward = -84.75, steps = 162\n",
      "10:30:16 [DEBUG] evaluate generation 2691: reward = -84.40, steps = 155\n",
      "10:30:17 [DEBUG] evaluate generation 2692: reward = -85.14, steps = 156\n",
      "10:30:19 [DEBUG] evaluate generation 2693: reward = -84.35, steps = 157\n",
      "10:30:20 [DEBUG] evaluate generation 2694: reward = -84.90, steps = 146\n",
      "10:30:22 [DEBUG] evaluate generation 2695: reward = -85.20, steps = 154\n",
      "10:30:24 [DEBUG] evaluate generation 2696: reward = -85.10, steps = 147\n",
      "10:30:26 [DEBUG] evaluate generation 2697: reward = -85.14, steps = 146\n",
      "10:30:28 [DEBUG] evaluate generation 2698: reward = -85.06, steps = 161\n",
      "10:30:30 [DEBUG] evaluate generation 2699: reward = -85.11, steps = 150\n",
      "10:30:31 [DEBUG] evaluate generation 2700: reward = -85.07, steps = 155\n",
      "10:30:34 [DEBUG] evaluate generation 2701: reward = -84.85, steps = 157\n",
      "10:30:37 [DEBUG] evaluate generation 2702: reward = -84.96, steps = 152\n",
      "10:30:38 [DEBUG] evaluate generation 2703: reward = -84.69, steps = 155\n",
      "10:30:41 [DEBUG] evaluate generation 2704: reward = -84.97, steps = 152\n",
      "10:30:43 [DEBUG] evaluate generation 2705: reward = -84.96, steps = 148\n",
      "10:30:44 [DEBUG] evaluate generation 2706: reward = -84.93, steps = 161\n",
      "10:30:46 [DEBUG] evaluate generation 2707: reward = -84.91, steps = 160\n",
      "10:30:47 [DEBUG] evaluate generation 2708: reward = -84.88, steps = 156\n",
      "10:30:49 [DEBUG] evaluate generation 2709: reward = -84.73, steps = 158\n",
      "10:30:50 [DEBUG] evaluate generation 2710: reward = -84.77, steps = 152\n",
      "10:30:52 [DEBUG] evaluate generation 2711: reward = -85.23, steps = 160\n",
      "10:30:53 [DEBUG] evaluate generation 2712: reward = -84.97, steps = 157\n",
      "10:30:56 [DEBUG] evaluate generation 2713: reward = -84.38, steps = 158\n",
      "10:30:59 [DEBUG] evaluate generation 2714: reward = -84.72, steps = 155\n",
      "10:31:01 [DEBUG] evaluate generation 2715: reward = -84.40, steps = 157\n",
      "10:31:03 [DEBUG] evaluate generation 2716: reward = -84.79, steps = 162\n",
      "10:31:05 [DEBUG] evaluate generation 2717: reward = -84.71, steps = 163\n",
      "10:31:06 [DEBUG] evaluate generation 2718: reward = -84.97, steps = 162\n",
      "10:31:09 [DEBUG] evaluate generation 2719: reward = -84.49, steps = 155\n",
      "10:31:13 [DEBUG] evaluate generation 2720: reward = -84.86, steps = 152\n",
      "10:31:16 [DEBUG] evaluate generation 2721: reward = -84.80, steps = 153\n",
      "10:31:19 [DEBUG] evaluate generation 2722: reward = -84.79, steps = 159\n",
      "10:31:20 [DEBUG] evaluate generation 2723: reward = -84.72, steps = 159\n",
      "10:31:23 [DEBUG] evaluate generation 2724: reward = -84.75, steps = 158\n",
      "10:31:24 [DEBUG] evaluate generation 2725: reward = -84.51, steps = 152\n",
      "10:31:26 [DEBUG] evaluate generation 2726: reward = -84.55, steps = 157\n",
      "10:31:27 [DEBUG] evaluate generation 2727: reward = -84.59, steps = 157\n",
      "10:31:30 [DEBUG] evaluate generation 2728: reward = -84.39, steps = 152\n",
      "10:31:32 [DEBUG] evaluate generation 2729: reward = -84.87, steps = 155\n",
      "10:31:34 [DEBUG] evaluate generation 2730: reward = -84.93, steps = 151\n",
      "10:31:35 [DEBUG] evaluate generation 2731: reward = -84.62, steps = 157\n",
      "10:31:38 [DEBUG] evaluate generation 2732: reward = -84.48, steps = 154\n",
      "10:31:39 [DEBUG] evaluate generation 2733: reward = -84.47, steps = 154\n",
      "10:31:43 [DEBUG] evaluate generation 2734: reward = -85.03, steps = 154\n",
      "10:31:46 [DEBUG] evaluate generation 2735: reward = -84.70, steps = 152\n",
      "10:31:47 [DEBUG] evaluate generation 2736: reward = -85.28, steps = 162\n",
      "10:31:49 [DEBUG] evaluate generation 2737: reward = -84.80, steps = 149\n",
      "10:31:50 [DEBUG] evaluate generation 2738: reward = -84.90, steps = 153\n",
      "10:31:52 [DEBUG] evaluate generation 2739: reward = -84.96, steps = 159\n",
      "10:31:53 [DEBUG] evaluate generation 2740: reward = -84.91, steps = 161\n",
      "10:31:56 [DEBUG] evaluate generation 2741: reward = -84.61, steps = 150\n",
      "10:31:58 [DEBUG] evaluate generation 2742: reward = -85.07, steps = 150\n",
      "10:32:01 [DEBUG] evaluate generation 2743: reward = -84.83, steps = 153\n",
      "10:32:03 [DEBUG] evaluate generation 2744: reward = -84.68, steps = 160\n",
      "10:32:05 [DEBUG] evaluate generation 2745: reward = -85.38, steps = 157\n",
      "10:32:07 [DEBUG] evaluate generation 2746: reward = -84.95, steps = 151\n",
      "10:32:09 [DEBUG] evaluate generation 2747: reward = -84.97, steps = 150\n",
      "10:32:12 [DEBUG] evaluate generation 2748: reward = -84.63, steps = 159\n",
      "10:32:14 [DEBUG] evaluate generation 2749: reward = -84.76, steps = 154\n",
      "10:32:18 [DEBUG] evaluate generation 2750: reward = -84.71, steps = 156\n",
      "10:32:19 [DEBUG] evaluate generation 2751: reward = -85.49, steps = 168\n",
      "10:32:21 [DEBUG] evaluate generation 2752: reward = -84.65, steps = 155\n",
      "10:32:23 [DEBUG] evaluate generation 2753: reward = -84.89, steps = 156\n",
      "10:32:27 [DEBUG] evaluate generation 2754: reward = -85.57, steps = 161\n",
      "10:32:28 [DEBUG] evaluate generation 2755: reward = -84.85, steps = 155\n",
      "10:32:31 [DEBUG] evaluate generation 2756: reward = -85.45, steps = 162\n",
      "10:32:33 [DEBUG] evaluate generation 2757: reward = -84.94, steps = 166\n",
      "10:32:35 [DEBUG] evaluate generation 2758: reward = -84.49, steps = 156\n",
      "10:32:37 [DEBUG] evaluate generation 2759: reward = -84.99, steps = 158\n",
      "10:32:39 [DEBUG] evaluate generation 2760: reward = -84.76, steps = 154\n",
      "10:32:42 [DEBUG] evaluate generation 2761: reward = -84.80, steps = 154\n",
      "10:32:44 [DEBUG] evaluate generation 2762: reward = -84.80, steps = 162\n",
      "10:32:46 [DEBUG] evaluate generation 2763: reward = -84.80, steps = 156\n",
      "10:32:47 [DEBUG] evaluate generation 2764: reward = -84.66, steps = 156\n",
      "10:32:49 [DEBUG] evaluate generation 2765: reward = -85.03, steps = 157\n",
      "10:32:50 [DEBUG] evaluate generation 2766: reward = -85.06, steps = 163\n",
      "10:32:52 [DEBUG] evaluate generation 2767: reward = -85.05, steps = 147\n",
      "10:32:55 [DEBUG] evaluate generation 2768: reward = -84.46, steps = 157\n",
      "10:32:57 [DEBUG] evaluate generation 2769: reward = -84.80, steps = 153\n",
      "10:33:00 [DEBUG] evaluate generation 2770: reward = -84.71, steps = 155\n",
      "10:33:03 [DEBUG] evaluate generation 2771: reward = -84.96, steps = 156\n",
      "10:33:04 [DEBUG] evaluate generation 2772: reward = -84.95, steps = 152\n",
      "10:33:06 [DEBUG] evaluate generation 2773: reward = -84.83, steps = 151\n",
      "10:33:08 [DEBUG] evaluate generation 2774: reward = -85.28, steps = 161\n",
      "10:33:10 [DEBUG] evaluate generation 2775: reward = -84.57, steps = 154\n",
      "10:33:12 [DEBUG] evaluate generation 2776: reward = -85.11, steps = 163\n",
      "10:33:14 [DEBUG] evaluate generation 2777: reward = -85.01, steps = 156\n",
      "10:33:16 [DEBUG] evaluate generation 2778: reward = -84.34, steps = 158\n",
      "10:33:19 [DEBUG] evaluate generation 2779: reward = -84.89, steps = 180\n",
      "10:33:20 [DEBUG] evaluate generation 2780: reward = -85.08, steps = 163\n",
      "10:33:22 [DEBUG] evaluate generation 2781: reward = -85.15, steps = 169\n",
      "10:33:23 [DEBUG] evaluate generation 2782: reward = -84.79, steps = 156\n",
      "10:33:25 [DEBUG] evaluate generation 2783: reward = -84.70, steps = 160\n",
      "10:33:26 [DEBUG] evaluate generation 2784: reward = -84.63, steps = 154\n",
      "10:33:28 [DEBUG] evaluate generation 2785: reward = -84.13, steps = 165\n",
      "10:33:29 [DEBUG] evaluate generation 2786: reward = -84.52, steps = 157\n",
      "10:33:32 [DEBUG] evaluate generation 2787: reward = -85.30, steps = 152\n",
      "10:33:35 [DEBUG] evaluate generation 2788: reward = -85.05, steps = 162\n",
      "10:33:36 [DEBUG] evaluate generation 2789: reward = -85.05, steps = 158\n",
      "10:33:39 [DEBUG] evaluate generation 2790: reward = -84.97, steps = 154\n",
      "10:33:40 [DEBUG] evaluate generation 2791: reward = -84.85, steps = 157\n",
      "10:33:43 [DEBUG] evaluate generation 2792: reward = -85.07, steps = 161\n",
      "10:33:44 [DEBUG] evaluate generation 2793: reward = -85.41, steps = 164\n",
      "10:33:47 [DEBUG] evaluate generation 2794: reward = -84.85, steps = 156\n",
      "10:33:48 [DEBUG] evaluate generation 2795: reward = -85.27, steps = 165\n",
      "10:33:50 [DEBUG] evaluate generation 2796: reward = -84.99, steps = 156\n",
      "10:33:53 [DEBUG] evaluate generation 2797: reward = -85.09, steps = 159\n",
      "10:33:55 [DEBUG] evaluate generation 2798: reward = -84.96, steps = 155\n",
      "10:33:56 [DEBUG] evaluate generation 2799: reward = -84.98, steps = 154\n",
      "10:33:59 [DEBUG] evaluate generation 2800: reward = -84.99, steps = 156\n",
      "10:34:00 [DEBUG] evaluate generation 2801: reward = -85.68, steps = 156\n",
      "10:34:02 [DEBUG] evaluate generation 2802: reward = -84.86, steps = 158\n",
      "10:34:04 [DEBUG] evaluate generation 2803: reward = -85.43, steps = 157\n",
      "10:34:06 [DEBUG] evaluate generation 2804: reward = -85.15, steps = 151\n",
      "10:34:08 [DEBUG] evaluate generation 2805: reward = -85.03, steps = 154\n",
      "10:34:09 [DEBUG] evaluate generation 2806: reward = -84.93, steps = 153\n",
      "10:34:11 [DEBUG] evaluate generation 2807: reward = -84.60, steps = 153\n",
      "10:34:13 [DEBUG] evaluate generation 2808: reward = -84.54, steps = 157\n",
      "10:34:15 [DEBUG] evaluate generation 2809: reward = -84.91, steps = 158\n",
      "10:34:18 [DEBUG] evaluate generation 2810: reward = -84.70, steps = 152\n",
      "10:34:20 [DEBUG] evaluate generation 2811: reward = -84.71, steps = 156\n",
      "10:34:23 [DEBUG] evaluate generation 2812: reward = -84.65, steps = 152\n",
      "10:34:24 [DEBUG] evaluate generation 2813: reward = -84.60, steps = 158\n",
      "10:34:26 [DEBUG] evaluate generation 2814: reward = -85.13, steps = 153\n",
      "10:34:28 [DEBUG] evaluate generation 2815: reward = -85.05, steps = 156\n",
      "10:34:32 [DEBUG] evaluate generation 2816: reward = -84.89, steps = 159\n",
      "10:34:33 [DEBUG] evaluate generation 2817: reward = -84.80, steps = 156\n",
      "10:34:35 [DEBUG] evaluate generation 2818: reward = -84.91, steps = 151\n",
      "10:34:36 [DEBUG] evaluate generation 2819: reward = -84.94, steps = 148\n",
      "10:34:39 [DEBUG] evaluate generation 2820: reward = -84.91, steps = 156\n",
      "10:34:41 [DEBUG] evaluate generation 2821: reward = -84.78, steps = 154\n",
      "10:34:43 [DEBUG] evaluate generation 2822: reward = -84.69, steps = 157\n",
      "10:34:45 [DEBUG] evaluate generation 2823: reward = -4.35, steps = 1600\n",
      "10:34:47 [DEBUG] evaluate generation 2824: reward = -84.85, steps = 154\n",
      "10:34:49 [DEBUG] evaluate generation 2825: reward = -84.63, steps = 155\n",
      "10:34:53 [DEBUG] evaluate generation 2826: reward = -84.60, steps = 155\n",
      "10:34:54 [DEBUG] evaluate generation 2827: reward = -84.95, steps = 157\n",
      "10:34:56 [DEBUG] evaluate generation 2828: reward = -84.90, steps = 153\n",
      "10:34:58 [DEBUG] evaluate generation 2829: reward = -85.04, steps = 158\n",
      "10:35:00 [DEBUG] evaluate generation 2830: reward = -85.40, steps = 157\n",
      "10:35:02 [DEBUG] evaluate generation 2831: reward = -85.24, steps = 161\n",
      "10:35:04 [DEBUG] evaluate generation 2832: reward = -85.08, steps = 156\n",
      "10:35:06 [DEBUG] evaluate generation 2833: reward = -84.60, steps = 153\n",
      "10:35:08 [DEBUG] evaluate generation 2834: reward = -84.99, steps = 156\n",
      "10:35:09 [DEBUG] evaluate generation 2835: reward = -84.95, steps = 155\n",
      "10:35:11 [DEBUG] evaluate generation 2836: reward = -84.61, steps = 152\n",
      "10:35:12 [DEBUG] evaluate generation 2837: reward = -84.98, steps = 157\n",
      "10:35:14 [DEBUG] evaluate generation 2838: reward = -84.73, steps = 153\n",
      "10:35:16 [DEBUG] evaluate generation 2839: reward = -84.97, steps = 154\n",
      "10:35:19 [DEBUG] evaluate generation 2840: reward = -84.65, steps = 160\n",
      "10:35:21 [DEBUG] evaluate generation 2841: reward = -85.16, steps = 154\n",
      "10:35:24 [DEBUG] evaluate generation 2842: reward = -84.93, steps = 158\n",
      "10:35:26 [DEBUG] evaluate generation 2843: reward = -84.53, steps = 152\n",
      "10:35:28 [DEBUG] evaluate generation 2844: reward = -84.91, steps = 160\n",
      "10:35:29 [DEBUG] evaluate generation 2845: reward = -84.76, steps = 153\n",
      "10:35:31 [DEBUG] evaluate generation 2846: reward = -84.48, steps = 154\n",
      "10:35:33 [DEBUG] evaluate generation 2847: reward = -84.38, steps = 160\n",
      "10:35:35 [DEBUG] evaluate generation 2848: reward = -84.93, steps = 158\n",
      "10:35:36 [DEBUG] evaluate generation 2849: reward = -84.46, steps = 158\n",
      "10:35:38 [DEBUG] evaluate generation 2850: reward = -84.50, steps = 164\n",
      "10:35:41 [DEBUG] evaluate generation 2851: reward = -84.78, steps = 157\n",
      "10:35:42 [DEBUG] evaluate generation 2852: reward = -85.26, steps = 153\n",
      "10:35:44 [DEBUG] evaluate generation 2853: reward = -85.15, steps = 152\n",
      "10:35:46 [DEBUG] evaluate generation 2854: reward = -84.56, steps = 161\n",
      "10:35:47 [DEBUG] evaluate generation 2855: reward = -84.84, steps = 155\n",
      "10:35:49 [DEBUG] evaluate generation 2856: reward = -84.81, steps = 163\n",
      "10:35:52 [DEBUG] evaluate generation 2857: reward = -84.93, steps = 152\n",
      "10:35:54 [DEBUG] evaluate generation 2858: reward = -84.41, steps = 162\n",
      "10:35:55 [DEBUG] evaluate generation 2859: reward = -85.17, steps = 156\n",
      "10:35:57 [DEBUG] evaluate generation 2860: reward = -84.39, steps = 157\n",
      "10:35:58 [DEBUG] evaluate generation 2861: reward = -84.72, steps = 154\n",
      "10:36:01 [DEBUG] evaluate generation 2862: reward = -84.84, steps = 160\n",
      "10:36:02 [DEBUG] evaluate generation 2863: reward = -84.74, steps = 157\n",
      "10:36:05 [DEBUG] evaluate generation 2864: reward = -84.38, steps = 155\n",
      "10:36:06 [DEBUG] evaluate generation 2865: reward = -84.64, steps = 155\n",
      "10:36:08 [DEBUG] evaluate generation 2866: reward = -84.74, steps = 153\n",
      "10:36:11 [DEBUG] evaluate generation 2867: reward = -84.25, steps = 165\n",
      "10:36:12 [DEBUG] evaluate generation 2868: reward = -85.07, steps = 164\n",
      "10:36:16 [DEBUG] evaluate generation 2869: reward = -84.68, steps = 153\n",
      "10:36:19 [DEBUG] evaluate generation 2870: reward = -84.73, steps = 162\n",
      "10:36:22 [DEBUG] evaluate generation 2871: reward = -84.90, steps = 156\n",
      "10:36:24 [DEBUG] evaluate generation 2872: reward = -84.86, steps = 163\n",
      "10:36:26 [DEBUG] evaluate generation 2873: reward = -84.85, steps = 166\n",
      "10:36:27 [DEBUG] evaluate generation 2874: reward = -84.67, steps = 158\n",
      "10:36:29 [DEBUG] evaluate generation 2875: reward = -84.35, steps = 158\n",
      "10:36:31 [DEBUG] evaluate generation 2876: reward = -85.33, steps = 164\n",
      "10:36:34 [DEBUG] evaluate generation 2877: reward = -0.91, steps = 1600\n",
      "10:36:35 [DEBUG] evaluate generation 2878: reward = -85.14, steps = 157\n",
      "10:36:37 [DEBUG] evaluate generation 2879: reward = -84.50, steps = 157\n",
      "10:36:39 [DEBUG] evaluate generation 2880: reward = -84.81, steps = 151\n",
      "10:36:41 [DEBUG] evaluate generation 2881: reward = -85.01, steps = 158\n",
      "10:36:46 [DEBUG] evaluate generation 2882: reward = -84.96, steps = 159\n",
      "10:36:48 [DEBUG] evaluate generation 2883: reward = -84.88, steps = 157\n",
      "10:36:51 [DEBUG] evaluate generation 2884: reward = -85.01, steps = 164\n",
      "10:36:52 [DEBUG] evaluate generation 2885: reward = -84.74, steps = 156\n",
      "10:36:54 [DEBUG] evaluate generation 2886: reward = -84.92, steps = 155\n",
      "10:36:56 [DEBUG] evaluate generation 2887: reward = -85.08, steps = 165\n",
      "10:36:57 [DEBUG] evaluate generation 2888: reward = -84.57, steps = 153\n",
      "10:36:59 [DEBUG] evaluate generation 2889: reward = -84.64, steps = 154\n",
      "10:37:02 [DEBUG] evaluate generation 2890: reward = -84.87, steps = 155\n",
      "10:37:04 [DEBUG] evaluate generation 2891: reward = -84.98, steps = 159\n",
      "10:37:07 [DEBUG] evaluate generation 2892: reward = -84.62, steps = 154\n",
      "10:37:09 [DEBUG] evaluate generation 2893: reward = -85.28, steps = 152\n",
      "10:37:10 [DEBUG] evaluate generation 2894: reward = -84.70, steps = 156\n",
      "10:37:12 [DEBUG] evaluate generation 2895: reward = -84.35, steps = 156\n",
      "10:37:15 [DEBUG] evaluate generation 2896: reward = -84.45, steps = 154\n",
      "10:37:17 [DEBUG] evaluate generation 2897: reward = -84.91, steps = 150\n",
      "10:37:20 [DEBUG] evaluate generation 2898: reward = -84.44, steps = 159\n",
      "10:37:22 [DEBUG] evaluate generation 2899: reward = -84.51, steps = 157\n",
      "10:37:23 [DEBUG] evaluate generation 2900: reward = -84.64, steps = 159\n",
      "10:37:25 [DEBUG] evaluate generation 2901: reward = -84.85, steps = 157\n",
      "10:37:27 [DEBUG] evaluate generation 2902: reward = -84.55, steps = 160\n",
      "10:37:29 [DEBUG] evaluate generation 2903: reward = -84.51, steps = 157\n",
      "10:37:32 [DEBUG] evaluate generation 2904: reward = -84.55, steps = 159\n",
      "10:37:33 [DEBUG] evaluate generation 2905: reward = -85.14, steps = 160\n",
      "10:37:35 [DEBUG] evaluate generation 2906: reward = -84.68, steps = 153\n",
      "10:37:38 [DEBUG] evaluate generation 2907: reward = -84.64, steps = 157\n",
      "10:37:39 [DEBUG] evaluate generation 2908: reward = -84.61, steps = 159\n",
      "10:37:41 [DEBUG] evaluate generation 2909: reward = -84.64, steps = 161\n",
      "10:37:42 [DEBUG] evaluate generation 2910: reward = -84.15, steps = 154\n",
      "10:37:45 [DEBUG] evaluate generation 2911: reward = -84.64, steps = 153\n",
      "10:37:46 [DEBUG] evaluate generation 2912: reward = -84.51, steps = 153\n",
      "10:37:48 [DEBUG] evaluate generation 2913: reward = -84.54, steps = 156\n",
      "10:37:50 [DEBUG] evaluate generation 2914: reward = -84.77, steps = 155\n",
      "10:37:51 [DEBUG] evaluate generation 2915: reward = -84.76, steps = 157\n",
      "10:37:53 [DEBUG] evaluate generation 2916: reward = -85.15, steps = 158\n",
      "10:37:55 [DEBUG] evaluate generation 2917: reward = -84.33, steps = 152\n",
      "10:37:58 [DEBUG] evaluate generation 2918: reward = -84.51, steps = 150\n",
      "10:37:59 [DEBUG] evaluate generation 2919: reward = -84.97, steps = 151\n",
      "10:38:01 [DEBUG] evaluate generation 2920: reward = -85.26, steps = 157\n",
      "10:38:02 [DEBUG] evaluate generation 2921: reward = -84.97, steps = 154\n",
      "10:38:04 [DEBUG] evaluate generation 2922: reward = -84.33, steps = 153\n",
      "10:38:05 [DEBUG] evaluate generation 2923: reward = -84.67, steps = 155\n",
      "10:38:07 [DEBUG] evaluate generation 2924: reward = -84.19, steps = 157\n",
      "10:38:09 [DEBUG] evaluate generation 2925: reward = -84.69, steps = 152\n",
      "10:38:12 [DEBUG] evaluate generation 2926: reward = -84.94, steps = 161\n",
      "10:38:14 [DEBUG] evaluate generation 2927: reward = -84.32, steps = 162\n",
      "10:38:17 [DEBUG] evaluate generation 2928: reward = -84.88, steps = 151\n",
      "10:38:18 [DEBUG] evaluate generation 2929: reward = -84.27, steps = 154\n",
      "10:38:20 [DEBUG] evaluate generation 2930: reward = -84.80, steps = 152\n",
      "10:38:21 [DEBUG] evaluate generation 2931: reward = -84.70, steps = 156\n",
      "10:38:25 [DEBUG] evaluate generation 2932: reward = -84.53, steps = 151\n",
      "10:38:28 [DEBUG] evaluate generation 2933: reward = -85.28, steps = 151\n",
      "10:38:29 [DEBUG] evaluate generation 2934: reward = -84.13, steps = 156\n",
      "10:38:33 [DEBUG] evaluate generation 2935: reward = -84.29, steps = 171\n",
      "10:38:34 [DEBUG] evaluate generation 2936: reward = -85.00, steps = 153\n",
      "10:38:37 [DEBUG] evaluate generation 2937: reward = -84.55, steps = 152\n",
      "10:38:39 [DEBUG] evaluate generation 2938: reward = -85.29, steps = 165\n",
      "10:38:42 [DEBUG] evaluate generation 2939: reward = -84.56, steps = 155\n",
      "10:38:44 [DEBUG] evaluate generation 2940: reward = -84.53, steps = 155\n",
      "10:38:45 [DEBUG] evaluate generation 2941: reward = -84.81, steps = 155\n",
      "10:38:47 [DEBUG] evaluate generation 2942: reward = -84.08, steps = 152\n",
      "10:38:49 [DEBUG] evaluate generation 2943: reward = -84.20, steps = 154\n",
      "10:38:52 [DEBUG] evaluate generation 2944: reward = -84.51, steps = 151\n",
      "10:38:53 [DEBUG] evaluate generation 2945: reward = -84.49, steps = 154\n",
      "10:38:55 [DEBUG] evaluate generation 2946: reward = -84.87, steps = 160\n",
      "10:38:57 [DEBUG] evaluate generation 2947: reward = -85.04, steps = 152\n",
      "10:38:59 [DEBUG] evaluate generation 2948: reward = -84.33, steps = 157\n",
      "10:39:02 [DEBUG] evaluate generation 2949: reward = -84.62, steps = 153\n",
      "10:39:03 [DEBUG] evaluate generation 2950: reward = -84.77, steps = 155\n",
      "10:39:05 [DEBUG] evaluate generation 2951: reward = -85.04, steps = 158\n",
      "10:39:06 [DEBUG] evaluate generation 2952: reward = -84.33, steps = 151\n",
      "10:39:08 [DEBUG] evaluate generation 2953: reward = -85.29, steps = 163\n",
      "10:39:09 [DEBUG] evaluate generation 2954: reward = -84.79, steps = 148\n",
      "10:39:11 [DEBUG] evaluate generation 2955: reward = -84.98, steps = 153\n",
      "10:39:14 [DEBUG] evaluate generation 2956: reward = -84.75, steps = 156\n",
      "10:39:15 [DEBUG] evaluate generation 2957: reward = -84.44, steps = 156\n",
      "10:39:17 [DEBUG] evaluate generation 2958: reward = -85.29, steps = 154\n",
      "10:39:18 [DEBUG] evaluate generation 2959: reward = -84.86, steps = 156\n",
      "10:39:20 [DEBUG] evaluate generation 2960: reward = -84.62, steps = 158\n",
      "10:39:22 [DEBUG] evaluate generation 2961: reward = -84.32, steps = 150\n",
      "10:39:24 [DEBUG] evaluate generation 2962: reward = -85.60, steps = 164\n",
      "10:39:26 [DEBUG] evaluate generation 2963: reward = -84.52, steps = 146\n",
      "10:39:28 [DEBUG] evaluate generation 2964: reward = -84.34, steps = 150\n",
      "10:39:31 [DEBUG] evaluate generation 2965: reward = -84.83, steps = 159\n",
      "10:39:33 [DEBUG] evaluate generation 2966: reward = -84.45, steps = 149\n",
      "10:39:36 [DEBUG] evaluate generation 2967: reward = -84.81, steps = 148\n",
      "10:39:38 [DEBUG] evaluate generation 2968: reward = -84.34, steps = 149\n",
      "10:39:41 [DEBUG] evaluate generation 2969: reward = -84.81, steps = 156\n",
      "10:39:42 [DEBUG] evaluate generation 2970: reward = -85.08, steps = 154\n",
      "10:39:44 [DEBUG] evaluate generation 2971: reward = -84.55, steps = 157\n",
      "10:39:45 [DEBUG] evaluate generation 2972: reward = -84.82, steps = 163\n",
      "10:39:47 [DEBUG] evaluate generation 2973: reward = -85.19, steps = 153\n",
      "10:39:49 [DEBUG] evaluate generation 2974: reward = -84.45, steps = 155\n",
      "10:39:52 [DEBUG] evaluate generation 2975: reward = -84.92, steps = 155\n",
      "10:39:54 [DEBUG] evaluate generation 2976: reward = -84.75, steps = 158\n",
      "10:39:56 [DEBUG] evaluate generation 2977: reward = -84.79, steps = 154\n",
      "10:39:57 [DEBUG] evaluate generation 2978: reward = -86.52, steps = 159\n",
      "10:39:59 [DEBUG] evaluate generation 2979: reward = -84.44, steps = 151\n",
      "10:40:01 [DEBUG] evaluate generation 2980: reward = -84.95, steps = 162\n",
      "10:40:02 [DEBUG] evaluate generation 2981: reward = -84.48, steps = 152\n",
      "10:40:04 [DEBUG] evaluate generation 2982: reward = -84.93, steps = 153\n",
      "10:40:05 [DEBUG] evaluate generation 2983: reward = -84.74, steps = 152\n",
      "10:40:07 [DEBUG] evaluate generation 2984: reward = -84.61, steps = 155\n",
      "10:40:09 [DEBUG] evaluate generation 2985: reward = -84.30, steps = 152\n",
      "10:40:11 [DEBUG] evaluate generation 2986: reward = -84.42, steps = 154\n",
      "10:40:13 [DEBUG] evaluate generation 2987: reward = -84.60, steps = 155\n",
      "10:40:15 [DEBUG] evaluate generation 2988: reward = -85.09, steps = 156\n",
      "10:40:16 [DEBUG] evaluate generation 2989: reward = -85.01, steps = 158\n",
      "10:40:18 [DEBUG] evaluate generation 2990: reward = -85.24, steps = 163\n",
      "10:40:21 [DEBUG] evaluate generation 2991: reward = -84.83, steps = 159\n",
      "10:40:23 [DEBUG] evaluate generation 2992: reward = -84.93, steps = 153\n",
      "10:40:26 [DEBUG] evaluate generation 2993: reward = -84.94, steps = 149\n",
      "10:40:28 [DEBUG] evaluate generation 2994: reward = -84.50, steps = 153\n",
      "10:40:29 [DEBUG] evaluate generation 2995: reward = -85.01, steps = 156\n",
      "10:40:31 [DEBUG] evaluate generation 2996: reward = -85.02, steps = 157\n",
      "10:40:32 [DEBUG] evaluate generation 2997: reward = -84.73, steps = 154\n",
      "10:40:34 [DEBUG] evaluate generation 2998: reward = -85.05, steps = 154\n",
      "10:40:35 [DEBUG] evaluate generation 2999: reward = -85.09, steps = 150\n",
      "10:40:38 [DEBUG] evaluate generation 3000: reward = -85.09, steps = 152\n",
      "10:40:39 [DEBUG] evaluate generation 3001: reward = -85.20, steps = 150\n",
      "10:40:41 [DEBUG] evaluate generation 3002: reward = -84.91, steps = 155\n",
      "10:40:42 [DEBUG] evaluate generation 3003: reward = -84.37, steps = 149\n",
      "10:40:45 [DEBUG] evaluate generation 3004: reward = -84.63, steps = 155\n",
      "10:40:46 [DEBUG] evaluate generation 3005: reward = -84.65, steps = 153\n",
      "10:40:48 [DEBUG] evaluate generation 3006: reward = -84.51, steps = 150\n",
      "10:40:50 [DEBUG] evaluate generation 3007: reward = -84.80, steps = 148\n",
      "10:40:51 [DEBUG] evaluate generation 3008: reward = -84.92, steps = 152\n",
      "10:40:54 [DEBUG] evaluate generation 3009: reward = -85.34, steps = 151\n",
      "10:40:56 [DEBUG] evaluate generation 3010: reward = -84.37, steps = 150\n",
      "10:40:58 [DEBUG] evaluate generation 3011: reward = -84.54, steps = 164\n",
      "10:40:59 [DEBUG] evaluate generation 3012: reward = -84.35, steps = 147\n",
      "10:41:01 [DEBUG] evaluate generation 3013: reward = -84.51, steps = 149\n",
      "10:41:04 [DEBUG] evaluate generation 3014: reward = -84.38, steps = 149\n",
      "10:41:06 [DEBUG] evaluate generation 3015: reward = -84.41, steps = 149\n",
      "10:41:08 [DEBUG] evaluate generation 3016: reward = -84.51, steps = 143\n",
      "10:41:10 [DEBUG] evaluate generation 3017: reward = -85.14, steps = 150\n",
      "10:41:12 [DEBUG] evaluate generation 3018: reward = -84.90, steps = 151\n",
      "10:41:13 [DEBUG] evaluate generation 3019: reward = -84.71, steps = 157\n",
      "10:41:15 [DEBUG] evaluate generation 3020: reward = -83.81, steps = 152\n",
      "10:41:17 [DEBUG] evaluate generation 3021: reward = -85.19, steps = 151\n",
      "10:41:20 [DEBUG] evaluate generation 3022: reward = -83.89, steps = 154\n",
      "10:41:22 [DEBUG] evaluate generation 3023: reward = -85.32, steps = 144\n",
      "10:41:25 [DEBUG] evaluate generation 3024: reward = -83.94, steps = 153\n",
      "10:41:27 [DEBUG] evaluate generation 3025: reward = -85.00, steps = 149\n",
      "10:41:29 [DEBUG] evaluate generation 3026: reward = -84.22, steps = 157\n",
      "10:41:30 [DEBUG] evaluate generation 3027: reward = -84.77, steps = 151\n",
      "10:41:32 [DEBUG] evaluate generation 3028: reward = -84.39, steps = 151\n",
      "10:41:34 [DEBUG] evaluate generation 3029: reward = -83.01, steps = 158\n",
      "10:41:36 [DEBUG] evaluate generation 3030: reward = -83.69, steps = 158\n",
      "10:41:38 [DEBUG] evaluate generation 3031: reward = -84.61, steps = 157\n",
      "10:41:39 [DEBUG] evaluate generation 3032: reward = -84.77, steps = 150\n",
      "10:41:42 [DEBUG] evaluate generation 3033: reward = -83.72, steps = 155\n",
      "10:41:43 [DEBUG] evaluate generation 3034: reward = -83.90, steps = 151\n",
      "10:41:45 [DEBUG] evaluate generation 3035: reward = -83.62, steps = 156\n",
      "10:41:46 [DEBUG] evaluate generation 3036: reward = -83.83, steps = 153\n",
      "10:41:48 [DEBUG] evaluate generation 3037: reward = -84.11, steps = 171\n",
      "10:41:49 [DEBUG] evaluate generation 3038: reward = -84.83, steps = 162\n",
      "10:41:51 [DEBUG] evaluate generation 3039: reward = -84.46, steps = 144\n",
      "10:41:53 [DEBUG] evaluate generation 3040: reward = -85.08, steps = 146\n",
      "10:41:54 [DEBUG] evaluate generation 3041: reward = -83.98, steps = 147\n",
      "10:41:56 [DEBUG] evaluate generation 3042: reward = -84.21, steps = 147\n",
      "10:41:58 [DEBUG] evaluate generation 3043: reward = -84.69, steps = 144\n",
      "10:41:59 [DEBUG] evaluate generation 3044: reward = -84.38, steps = 145\n",
      "10:42:02 [DEBUG] evaluate generation 3045: reward = -83.15, steps = 153\n",
      "10:42:03 [DEBUG] evaluate generation 3046: reward = -83.68, steps = 150\n",
      "10:42:05 [DEBUG] evaluate generation 3047: reward = -85.19, steps = 147\n",
      "10:42:07 [DEBUG] evaluate generation 3048: reward = -82.40, steps = 157\n",
      "10:42:09 [DEBUG] evaluate generation 3049: reward = -84.14, steps = 153\n",
      "10:42:11 [DEBUG] evaluate generation 3050: reward = -83.83, steps = 155\n",
      "10:42:12 [DEBUG] evaluate generation 3051: reward = -82.55, steps = 158\n",
      "10:42:15 [DEBUG] evaluate generation 3052: reward = -83.61, steps = 155\n",
      "10:42:18 [DEBUG] evaluate generation 3053: reward = -83.85, steps = 157\n",
      "10:42:20 [DEBUG] evaluate generation 3054: reward = -86.01, steps = 145\n",
      "10:42:21 [DEBUG] evaluate generation 3055: reward = -84.33, steps = 151\n",
      "10:42:23 [DEBUG] evaluate generation 3056: reward = -83.79, steps = 156\n",
      "10:42:25 [DEBUG] evaluate generation 3057: reward = -85.55, steps = 145\n",
      "10:42:26 [DEBUG] evaluate generation 3058: reward = -84.34, steps = 153\n",
      "10:42:28 [DEBUG] evaluate generation 3059: reward = -85.38, steps = 144\n",
      "10:42:30 [DEBUG] evaluate generation 3060: reward = -84.73, steps = 152\n",
      "10:42:33 [DEBUG] evaluate generation 3061: reward = -84.63, steps = 152\n",
      "10:42:34 [DEBUG] evaluate generation 3062: reward = -84.32, steps = 152\n",
      "10:42:36 [DEBUG] evaluate generation 3063: reward = -83.36, steps = 149\n",
      "10:42:38 [DEBUG] evaluate generation 3064: reward = -83.66, steps = 150\n",
      "10:42:40 [DEBUG] evaluate generation 3065: reward = -84.82, steps = 143\n",
      "10:42:42 [DEBUG] evaluate generation 3066: reward = -84.23, steps = 146\n",
      "10:42:43 [DEBUG] evaluate generation 3067: reward = -84.45, steps = 145\n",
      "10:42:45 [DEBUG] evaluate generation 3068: reward = -84.24, steps = 152\n",
      "10:42:48 [DEBUG] evaluate generation 3069: reward = -84.90, steps = 141\n",
      "10:42:49 [DEBUG] evaluate generation 3070: reward = -83.50, steps = 153\n",
      "10:42:51 [DEBUG] evaluate generation 3071: reward = -83.42, steps = 161\n",
      "10:42:52 [DEBUG] evaluate generation 3072: reward = -85.12, steps = 154\n",
      "10:42:54 [DEBUG] evaluate generation 3073: reward = -83.09, steps = 153\n",
      "10:42:55 [DEBUG] evaluate generation 3074: reward = -84.21, steps = 150\n",
      "10:42:57 [DEBUG] evaluate generation 3075: reward = -84.27, steps = 158\n",
      "10:43:00 [DEBUG] evaluate generation 3076: reward = -81.89, steps = 160\n",
      "10:43:01 [DEBUG] evaluate generation 3077: reward = -83.40, steps = 160\n",
      "10:43:03 [DEBUG] evaluate generation 3078: reward = -83.78, steps = 157\n",
      "10:43:05 [DEBUG] evaluate generation 3079: reward = -84.10, steps = 152\n",
      "10:43:06 [DEBUG] evaluate generation 3080: reward = -85.32, steps = 146\n",
      "10:43:08 [DEBUG] evaluate generation 3081: reward = -82.81, steps = 157\n",
      "10:43:10 [DEBUG] evaluate generation 3082: reward = -83.02, steps = 157\n",
      "10:43:13 [DEBUG] evaluate generation 3083: reward = -82.48, steps = 154\n",
      "10:43:14 [DEBUG] evaluate generation 3084: reward = -82.33, steps = 160\n",
      "10:43:17 [DEBUG] evaluate generation 3085: reward = -82.59, steps = 155\n",
      "10:43:18 [DEBUG] evaluate generation 3086: reward = -83.84, steps = 157\n",
      "10:43:20 [DEBUG] evaluate generation 3087: reward = -81.30, steps = 156\n",
      "10:43:23 [DEBUG] evaluate generation 3088: reward = -83.65, steps = 151\n",
      "10:43:25 [DEBUG] evaluate generation 3089: reward = -85.13, steps = 151\n",
      "10:43:27 [DEBUG] evaluate generation 3090: reward = -81.76, steps = 153\n",
      "10:43:29 [DEBUG] evaluate generation 3091: reward = -85.53, steps = 153\n",
      "10:43:31 [DEBUG] evaluate generation 3092: reward = -82.99, steps = 150\n",
      "10:43:33 [DEBUG] evaluate generation 3093: reward = -81.36, steps = 157\n",
      "10:43:34 [DEBUG] evaluate generation 3094: reward = -81.29, steps = 158\n",
      "10:43:36 [DEBUG] evaluate generation 3095: reward = -84.07, steps = 150\n",
      "10:43:37 [DEBUG] evaluate generation 3096: reward = -81.66, steps = 151\n",
      "10:43:38 [DEBUG] evaluate generation 3097: reward = -80.20, steps = 157\n",
      "10:43:40 [DEBUG] evaluate generation 3098: reward = -82.00, steps = 151\n",
      "10:43:41 [DEBUG] evaluate generation 3099: reward = -80.31, steps = 156\n",
      "10:43:43 [DEBUG] evaluate generation 3100: reward = -85.20, steps = 157\n",
      "10:43:45 [DEBUG] evaluate generation 3101: reward = -80.99, steps = 156\n",
      "10:43:47 [DEBUG] evaluate generation 3102: reward = -82.39, steps = 154\n",
      "10:43:48 [DEBUG] evaluate generation 3103: reward = -84.22, steps = 152\n",
      "10:43:50 [DEBUG] evaluate generation 3104: reward = -80.82, steps = 153\n",
      "10:43:52 [DEBUG] evaluate generation 3105: reward = -80.68, steps = 155\n",
      "10:43:53 [DEBUG] evaluate generation 3106: reward = -81.12, steps = 153\n",
      "10:43:56 [DEBUG] evaluate generation 3107: reward = -84.12, steps = 149\n",
      "10:43:59 [DEBUG] evaluate generation 3108: reward = -82.70, steps = 156\n",
      "10:44:01 [DEBUG] evaluate generation 3109: reward = -82.98, steps = 152\n",
      "10:44:02 [DEBUG] evaluate generation 3110: reward = -82.73, steps = 153\n",
      "10:44:04 [DEBUG] evaluate generation 3111: reward = -83.72, steps = 159\n",
      "10:44:06 [DEBUG] evaluate generation 3112: reward = -85.13, steps = 167\n",
      "10:44:08 [DEBUG] evaluate generation 3113: reward = -83.37, steps = 153\n",
      "10:44:10 [DEBUG] evaluate generation 3114: reward = -81.49, steps = 163\n",
      "10:44:11 [DEBUG] evaluate generation 3115: reward = -82.79, steps = 162\n",
      "10:44:13 [DEBUG] evaluate generation 3116: reward = -82.39, steps = 163\n",
      "10:44:14 [DEBUG] evaluate generation 3117: reward = -82.30, steps = 162\n",
      "10:44:16 [DEBUG] evaluate generation 3118: reward = -81.14, steps = 158\n",
      "10:44:18 [DEBUG] evaluate generation 3119: reward = -79.74, steps = 160\n",
      "10:44:20 [DEBUG] evaluate generation 3120: reward = -82.97, steps = 170\n",
      "10:44:21 [DEBUG] evaluate generation 3121: reward = -78.22, steps = 164\n",
      "10:44:24 [DEBUG] evaluate generation 3122: reward = -81.91, steps = 160\n",
      "10:44:26 [DEBUG] evaluate generation 3123: reward = -82.06, steps = 159\n",
      "10:44:28 [DEBUG] evaluate generation 3124: reward = -84.90, steps = 149\n",
      "10:44:30 [DEBUG] evaluate generation 3125: reward = -84.69, steps = 152\n",
      "10:44:31 [DEBUG] evaluate generation 3126: reward = -83.07, steps = 151\n",
      "10:44:33 [DEBUG] evaluate generation 3127: reward = -81.72, steps = 164\n",
      "10:44:35 [DEBUG] evaluate generation 3128: reward = -81.71, steps = 165\n",
      "10:44:36 [DEBUG] evaluate generation 3129: reward = -86.05, steps = 142\n",
      "10:44:39 [DEBUG] evaluate generation 3130: reward = -78.58, steps = 164\n",
      "10:44:40 [DEBUG] evaluate generation 3131: reward = -81.73, steps = 161\n",
      "10:44:42 [DEBUG] evaluate generation 3132: reward = -82.29, steps = 153\n",
      "10:44:44 [DEBUG] evaluate generation 3133: reward = -84.54, steps = 155\n",
      "10:44:46 [DEBUG] evaluate generation 3134: reward = -82.41, steps = 151\n",
      "10:44:48 [DEBUG] evaluate generation 3135: reward = -82.72, steps = 149\n",
      "10:44:50 [DEBUG] evaluate generation 3136: reward = -81.79, steps = 151\n",
      "10:44:52 [DEBUG] evaluate generation 3137: reward = -79.54, steps = 160\n",
      "10:44:55 [DEBUG] evaluate generation 3138: reward = -83.04, steps = 151\n",
      "10:44:56 [DEBUG] evaluate generation 3139: reward = -83.83, steps = 144\n",
      "10:44:58 [DEBUG] evaluate generation 3140: reward = -82.05, steps = 154\n",
      "10:45:00 [DEBUG] evaluate generation 3141: reward = -83.28, steps = 150\n",
      "10:45:01 [DEBUG] evaluate generation 3142: reward = -82.76, steps = 147\n",
      "10:45:03 [DEBUG] evaluate generation 3143: reward = -85.33, steps = 144\n",
      "10:45:04 [DEBUG] evaluate generation 3144: reward = -79.99, steps = 156\n",
      "10:45:06 [DEBUG] evaluate generation 3145: reward = -77.33, steps = 162\n",
      "10:45:08 [DEBUG] evaluate generation 3146: reward = -77.51, steps = 161\n",
      "10:45:09 [DEBUG] evaluate generation 3147: reward = -82.43, steps = 146\n",
      "10:45:12 [DEBUG] evaluate generation 3148: reward = -83.11, steps = 147\n",
      "10:45:14 [DEBUG] evaluate generation 3149: reward = -82.08, steps = 151\n",
      "10:45:16 [DEBUG] evaluate generation 3150: reward = -81.93, steps = 148\n",
      "10:45:17 [DEBUG] evaluate generation 3151: reward = -81.96, steps = 145\n",
      "10:45:19 [DEBUG] evaluate generation 3152: reward = -82.63, steps = 145\n",
      "10:45:21 [DEBUG] evaluate generation 3153: reward = -85.66, steps = 138\n",
      "10:45:23 [DEBUG] evaluate generation 3154: reward = -83.33, steps = 148\n",
      "10:45:25 [DEBUG] evaluate generation 3155: reward = -84.43, steps = 140\n",
      "10:45:27 [DEBUG] evaluate generation 3156: reward = -82.40, steps = 147\n",
      "10:45:29 [DEBUG] evaluate generation 3157: reward = -83.05, steps = 148\n",
      "10:45:32 [DEBUG] evaluate generation 3158: reward = -84.06, steps = 145\n",
      "10:45:34 [DEBUG] evaluate generation 3159: reward = -81.70, steps = 146\n",
      "10:45:36 [DEBUG] evaluate generation 3160: reward = -81.36, steps = 152\n",
      "10:45:38 [DEBUG] evaluate generation 3161: reward = -81.53, steps = 151\n",
      "10:45:40 [DEBUG] evaluate generation 3162: reward = -82.71, steps = 154\n",
      "10:45:42 [DEBUG] evaluate generation 3163: reward = -83.09, steps = 146\n",
      "10:45:44 [DEBUG] evaluate generation 3164: reward = -83.66, steps = 144\n",
      "10:45:45 [DEBUG] evaluate generation 3165: reward = -81.90, steps = 151\n",
      "10:45:47 [DEBUG] evaluate generation 3166: reward = -82.01, steps = 154\n",
      "10:45:48 [DEBUG] evaluate generation 3167: reward = -83.05, steps = 142\n",
      "10:45:50 [DEBUG] evaluate generation 3168: reward = -82.78, steps = 142\n",
      "10:45:51 [DEBUG] evaluate generation 3169: reward = -82.84, steps = 144\n",
      "10:45:53 [DEBUG] evaluate generation 3170: reward = -81.16, steps = 148\n",
      "10:45:55 [DEBUG] evaluate generation 3171: reward = -82.44, steps = 149\n",
      "10:45:59 [DEBUG] evaluate generation 3172: reward = -81.06, steps = 156\n",
      "10:46:00 [DEBUG] evaluate generation 3173: reward = -82.03, steps = 146\n",
      "10:46:02 [DEBUG] evaluate generation 3174: reward = -81.68, steps = 147\n",
      "10:46:04 [DEBUG] evaluate generation 3175: reward = -81.97, steps = 146\n",
      "10:46:05 [DEBUG] evaluate generation 3176: reward = -81.14, steps = 155\n",
      "10:46:07 [DEBUG] evaluate generation 3177: reward = -83.57, steps = 138\n",
      "10:46:09 [DEBUG] evaluate generation 3178: reward = -82.33, steps = 154\n",
      "10:46:12 [DEBUG] evaluate generation 3179: reward = -83.42, steps = 145\n",
      "10:46:14 [DEBUG] evaluate generation 3180: reward = -82.79, steps = 150\n",
      "10:46:16 [DEBUG] evaluate generation 3181: reward = -82.72, steps = 149\n",
      "10:46:19 [DEBUG] evaluate generation 3182: reward = -82.78, steps = 146\n",
      "10:46:21 [DEBUG] evaluate generation 3183: reward = -81.93, steps = 156\n",
      "10:46:23 [DEBUG] evaluate generation 3184: reward = -82.51, steps = 145\n",
      "10:46:24 [DEBUG] evaluate generation 3185: reward = -82.12, steps = 144\n",
      "10:46:27 [DEBUG] evaluate generation 3186: reward = -82.88, steps = 144\n",
      "10:46:28 [DEBUG] evaluate generation 3187: reward = -82.70, steps = 148\n",
      "10:46:30 [DEBUG] evaluate generation 3188: reward = -81.11, steps = 152\n",
      "10:46:33 [DEBUG] evaluate generation 3189: reward = -77.07, steps = 165\n",
      "10:46:35 [DEBUG] evaluate generation 3190: reward = -78.62, steps = 168\n",
      "10:46:38 [DEBUG] evaluate generation 3191: reward = -81.66, steps = 149\n",
      "10:46:40 [DEBUG] evaluate generation 3192: reward = -77.52, steps = 162\n",
      "10:46:43 [DEBUG] evaluate generation 3193: reward = -80.53, steps = 157\n",
      "10:46:45 [DEBUG] evaluate generation 3194: reward = -81.45, steps = 149\n",
      "10:46:47 [DEBUG] evaluate generation 3195: reward = -79.71, steps = 165\n",
      "10:46:48 [DEBUG] evaluate generation 3196: reward = -79.31, steps = 164\n",
      "10:46:50 [DEBUG] evaluate generation 3197: reward = -78.81, steps = 168\n",
      "10:46:53 [DEBUG] evaluate generation 3198: reward = -81.88, steps = 158\n",
      "10:46:55 [DEBUG] evaluate generation 3199: reward = -82.93, steps = 153\n",
      "10:46:57 [DEBUG] evaluate generation 3200: reward = -79.44, steps = 163\n",
      "10:47:00 [DEBUG] evaluate generation 3201: reward = -77.73, steps = 168\n",
      "10:47:03 [DEBUG] evaluate generation 3202: reward = -76.93, steps = 174\n",
      "10:47:06 [DEBUG] evaluate generation 3203: reward = -77.56, steps = 175\n",
      "10:47:08 [DEBUG] evaluate generation 3204: reward = -79.15, steps = 166\n",
      "10:47:10 [DEBUG] evaluate generation 3205: reward = -75.15, steps = 181\n",
      "10:47:12 [DEBUG] evaluate generation 3206: reward = -79.09, steps = 166\n",
      "10:47:14 [DEBUG] evaluate generation 3207: reward = -79.57, steps = 158\n",
      "10:47:17 [DEBUG] evaluate generation 3208: reward = -69.07, steps = 254\n",
      "10:47:19 [DEBUG] evaluate generation 3209: reward = -73.37, steps = 196\n",
      "10:47:22 [DEBUG] evaluate generation 3210: reward = -73.84, steps = 216\n",
      "10:47:24 [DEBUG] evaluate generation 3211: reward = -73.48, steps = 221\n",
      "10:47:26 [DEBUG] evaluate generation 3212: reward = -72.50, steps = 201\n",
      "10:47:29 [DEBUG] evaluate generation 3213: reward = -66.19, steps = 274\n",
      "10:47:32 [DEBUG] evaluate generation 3214: reward = -84.69, steps = 143\n",
      "10:47:34 [DEBUG] evaluate generation 3215: reward = -78.44, steps = 164\n",
      "10:47:37 [DEBUG] evaluate generation 3216: reward = -82.07, steps = 154\n",
      "10:47:41 [DEBUG] evaluate generation 3217: reward = -71.18, steps = 201\n",
      "10:47:43 [DEBUG] evaluate generation 3218: reward = -75.29, steps = 208\n",
      "10:47:45 [DEBUG] evaluate generation 3219: reward = -71.26, steps = 217\n",
      "10:47:48 [DEBUG] evaluate generation 3220: reward = -82.98, steps = 159\n",
      "10:47:51 [DEBUG] evaluate generation 3221: reward = -75.79, steps = 173\n",
      "10:47:52 [DEBUG] evaluate generation 3222: reward = -74.13, steps = 203\n",
      "10:47:54 [DEBUG] evaluate generation 3223: reward = -78.87, steps = 160\n",
      "10:47:56 [DEBUG] evaluate generation 3224: reward = -74.73, steps = 231\n",
      "10:47:57 [DEBUG] evaluate generation 3225: reward = -76.09, steps = 182\n",
      "10:47:59 [DEBUG] evaluate generation 3226: reward = -75.58, steps = 191\n",
      "10:48:03 [DEBUG] evaluate generation 3227: reward = -71.23, steps = 215\n",
      "10:48:05 [DEBUG] evaluate generation 3228: reward = -80.72, steps = 154\n",
      "10:48:07 [DEBUG] evaluate generation 3229: reward = -70.29, steps = 221\n",
      "10:48:08 [DEBUG] evaluate generation 3230: reward = -79.30, steps = 156\n",
      "10:48:10 [DEBUG] evaluate generation 3231: reward = -81.07, steps = 145\n",
      "10:48:11 [DEBUG] evaluate generation 3232: reward = -78.77, steps = 153\n",
      "10:48:14 [DEBUG] evaluate generation 3233: reward = -79.82, steps = 152\n",
      "10:48:17 [DEBUG] evaluate generation 3234: reward = -72.37, steps = 202\n",
      "10:48:20 [DEBUG] evaluate generation 3235: reward = -81.69, steps = 147\n",
      "10:48:21 [DEBUG] evaluate generation 3236: reward = -76.55, steps = 169\n",
      "10:48:24 [DEBUG] evaluate generation 3237: reward = -76.08, steps = 177\n",
      "10:48:26 [DEBUG] evaluate generation 3238: reward = -72.59, steps = 216\n",
      "10:48:30 [DEBUG] evaluate generation 3239: reward = -75.66, steps = 178\n",
      "10:48:31 [DEBUG] evaluate generation 3240: reward = -75.66, steps = 189\n",
      "10:48:35 [DEBUG] evaluate generation 3241: reward = -79.76, steps = 171\n",
      "10:48:37 [DEBUG] evaluate generation 3242: reward = -74.91, steps = 197\n",
      "10:48:39 [DEBUG] evaluate generation 3243: reward = -72.14, steps = 242\n",
      "10:48:42 [DEBUG] evaluate generation 3244: reward = -82.27, steps = 156\n",
      "10:48:44 [DEBUG] evaluate generation 3245: reward = -77.73, steps = 173\n",
      "10:48:47 [DEBUG] evaluate generation 3246: reward = -73.74, steps = 198\n",
      "10:48:52 [DEBUG] evaluate generation 3247: reward = -82.86, steps = 171\n",
      "10:48:54 [DEBUG] evaluate generation 3248: reward = -71.13, steps = 227\n",
      "10:48:58 [DEBUG] evaluate generation 3249: reward = -74.21, steps = 214\n",
      "10:49:01 [DEBUG] evaluate generation 3250: reward = -75.20, steps = 185\n",
      "10:49:03 [DEBUG] evaluate generation 3251: reward = -72.36, steps = 218\n",
      "10:49:05 [DEBUG] evaluate generation 3252: reward = -75.54, steps = 209\n",
      "10:49:09 [DEBUG] evaluate generation 3253: reward = -82.54, steps = 153\n",
      "10:49:11 [DEBUG] evaluate generation 3254: reward = -75.10, steps = 178\n",
      "10:49:15 [DEBUG] evaluate generation 3255: reward = -69.02, steps = 229\n",
      "10:49:17 [DEBUG] evaluate generation 3256: reward = -74.14, steps = 183\n",
      "10:49:20 [DEBUG] evaluate generation 3257: reward = -70.37, steps = 225\n",
      "10:49:23 [DEBUG] evaluate generation 3258: reward = -73.57, steps = 186\n",
      "10:49:25 [DEBUG] evaluate generation 3259: reward = -74.73, steps = 178\n",
      "10:49:28 [DEBUG] evaluate generation 3260: reward = -64.99, steps = 257\n",
      "10:49:31 [DEBUG] evaluate generation 3261: reward = -77.21, steps = 172\n",
      "10:49:33 [DEBUG] evaluate generation 3262: reward = -79.30, steps = 168\n",
      "10:49:35 [DEBUG] evaluate generation 3263: reward = -70.54, steps = 225\n",
      "10:49:39 [DEBUG] evaluate generation 3264: reward = -68.85, steps = 224\n",
      "10:49:41 [DEBUG] evaluate generation 3265: reward = -73.56, steps = 205\n",
      "10:49:44 [DEBUG] evaluate generation 3266: reward = -61.72, steps = 321\n",
      "10:49:47 [DEBUG] evaluate generation 3267: reward = -74.65, steps = 183\n",
      "10:49:51 [DEBUG] evaluate generation 3268: reward = -70.90, steps = 220\n",
      "10:49:53 [DEBUG] evaluate generation 3269: reward = -77.47, steps = 177\n",
      "10:49:55 [DEBUG] evaluate generation 3270: reward = -74.28, steps = 181\n",
      "10:49:59 [DEBUG] evaluate generation 3271: reward = -74.53, steps = 192\n",
      "10:50:04 [DEBUG] evaluate generation 3272: reward = -62.98, steps = 260\n",
      "10:50:08 [DEBUG] evaluate generation 3273: reward = -75.12, steps = 185\n",
      "10:50:11 [DEBUG] evaluate generation 3274: reward = -66.73, steps = 221\n",
      "10:50:14 [DEBUG] evaluate generation 3275: reward = -75.85, steps = 177\n",
      "10:50:16 [DEBUG] evaluate generation 3276: reward = -75.34, steps = 179\n",
      "10:50:18 [DEBUG] evaluate generation 3277: reward = -56.04, steps = 285\n",
      "10:50:20 [DEBUG] evaluate generation 3278: reward = -60.14, steps = 269\n",
      "10:50:23 [DEBUG] evaluate generation 3279: reward = -70.79, steps = 202\n",
      "10:50:25 [DEBUG] evaluate generation 3280: reward = -75.89, steps = 202\n",
      "10:50:27 [DEBUG] evaluate generation 3281: reward = -75.27, steps = 184\n",
      "10:50:32 [DEBUG] evaluate generation 3282: reward = -75.38, steps = 176\n",
      "10:50:34 [DEBUG] evaluate generation 3283: reward = -58.61, steps = 288\n",
      "10:50:36 [DEBUG] evaluate generation 3284: reward = -73.98, steps = 182\n",
      "10:50:38 [DEBUG] evaluate generation 3285: reward = -68.78, steps = 217\n",
      "10:50:40 [DEBUG] evaluate generation 3286: reward = -76.01, steps = 199\n",
      "10:50:44 [DEBUG] evaluate generation 3287: reward = -82.97, steps = 152\n",
      "10:50:47 [DEBUG] evaluate generation 3288: reward = -71.73, steps = 192\n",
      "10:50:48 [DEBUG] evaluate generation 3289: reward = -74.02, steps = 192\n",
      "10:50:52 [DEBUG] evaluate generation 3290: reward = -75.64, steps = 181\n",
      "10:50:56 [DEBUG] evaluate generation 3291: reward = -80.51, steps = 181\n",
      "10:50:58 [DEBUG] evaluate generation 3292: reward = -74.04, steps = 199\n",
      "10:51:02 [DEBUG] evaluate generation 3293: reward = -73.55, steps = 191\n",
      "10:51:04 [DEBUG] evaluate generation 3294: reward = -66.69, steps = 220\n",
      "10:51:07 [DEBUG] evaluate generation 3295: reward = -78.97, steps = 162\n",
      "10:51:09 [DEBUG] evaluate generation 3296: reward = -73.28, steps = 184\n",
      "10:51:12 [DEBUG] evaluate generation 3297: reward = -78.12, steps = 195\n",
      "10:51:15 [DEBUG] evaluate generation 3298: reward = -45.44, steps = 368\n",
      "10:51:18 [DEBUG] evaluate generation 3299: reward = -70.09, steps = 201\n",
      "10:51:22 [DEBUG] evaluate generation 3300: reward = -56.00, steps = 314\n",
      "10:51:24 [DEBUG] evaluate generation 3301: reward = -71.94, steps = 190\n",
      "10:51:26 [DEBUG] evaluate generation 3302: reward = -82.87, steps = 151\n",
      "10:51:28 [DEBUG] evaluate generation 3303: reward = -47.02, steps = 357\n",
      "10:51:31 [DEBUG] evaluate generation 3304: reward = -67.71, steps = 232\n",
      "10:51:33 [DEBUG] evaluate generation 3305: reward = -75.77, steps = 182\n",
      "10:51:36 [DEBUG] evaluate generation 3306: reward = -86.85, steps = 144\n",
      "10:51:39 [DEBUG] evaluate generation 3307: reward = -67.79, steps = 214\n",
      "10:51:41 [DEBUG] evaluate generation 3308: reward = -70.08, steps = 247\n",
      "10:51:43 [DEBUG] evaluate generation 3309: reward = -65.49, steps = 230\n",
      "10:51:46 [DEBUG] evaluate generation 3310: reward = -67.69, steps = 267\n",
      "10:51:50 [DEBUG] evaluate generation 3311: reward = -76.82, steps = 169\n",
      "10:51:54 [DEBUG] evaluate generation 3312: reward = -75.98, steps = 181\n",
      "10:51:57 [DEBUG] evaluate generation 3313: reward = -72.13, steps = 236\n",
      "10:52:01 [DEBUG] evaluate generation 3314: reward = -43.50, steps = 394\n",
      "10:52:04 [DEBUG] evaluate generation 3315: reward = -53.14, steps = 347\n",
      "10:52:07 [DEBUG] evaluate generation 3316: reward = -77.73, steps = 184\n",
      "10:52:09 [DEBUG] evaluate generation 3317: reward = -48.53, steps = 333\n",
      "10:52:12 [DEBUG] evaluate generation 3318: reward = -73.41, steps = 205\n",
      "10:52:14 [DEBUG] evaluate generation 3319: reward = -75.02, steps = 185\n",
      "10:52:17 [DEBUG] evaluate generation 3320: reward = -76.57, steps = 193\n",
      "10:52:20 [DEBUG] evaluate generation 3321: reward = -21.59, steps = 508\n",
      "10:52:23 [DEBUG] evaluate generation 3322: reward = -54.85, steps = 282\n",
      "10:52:25 [DEBUG] evaluate generation 3323: reward = -48.61, steps = 412\n",
      "10:52:27 [DEBUG] evaluate generation 3324: reward = -64.24, steps = 250\n",
      "10:52:30 [DEBUG] evaluate generation 3325: reward = -52.03, steps = 384\n",
      "10:52:32 [DEBUG] evaluate generation 3326: reward = -70.70, steps = 208\n",
      "10:52:35 [DEBUG] evaluate generation 3327: reward = -53.14, steps = 274\n",
      "10:52:37 [DEBUG] evaluate generation 3328: reward = -59.66, steps = 267\n",
      "10:52:40 [DEBUG] evaluate generation 3329: reward = -57.20, steps = 259\n",
      "10:52:42 [DEBUG] evaluate generation 3330: reward = -21.16, steps = 536\n",
      "10:52:44 [DEBUG] evaluate generation 3331: reward = -58.95, steps = 235\n",
      "10:52:46 [DEBUG] evaluate generation 3332: reward = -50.80, steps = 284\n",
      "10:52:49 [DEBUG] evaluate generation 3333: reward = -75.03, steps = 195\n",
      "10:52:51 [DEBUG] evaluate generation 3334: reward = -19.35, steps = 570\n",
      "10:52:54 [DEBUG] evaluate generation 3335: reward = -21.47, steps = 493\n",
      "10:52:56 [DEBUG] evaluate generation 3336: reward = -45.30, steps = 376\n",
      "10:52:58 [DEBUG] evaluate generation 3337: reward = -85.11, steps = 131\n",
      "10:53:01 [DEBUG] evaluate generation 3338: reward = -11.57, steps = 514\n",
      "10:53:05 [DEBUG] evaluate generation 3339: reward = -84.84, steps = 131\n",
      "10:53:07 [DEBUG] evaluate generation 3340: reward = -65.95, steps = 217\n",
      "10:53:09 [DEBUG] evaluate generation 3341: reward = -85.19, steps = 128\n",
      "10:53:11 [DEBUG] evaluate generation 3342: reward = -76.59, steps = 187\n",
      "10:53:13 [DEBUG] evaluate generation 3343: reward = -85.71, steps = 130\n",
      "10:53:16 [DEBUG] evaluate generation 3344: reward = 49.53, steps = 929\n",
      "10:53:18 [DEBUG] evaluate generation 3345: reward = -86.50, steps = 127\n",
      "10:53:22 [DEBUG] evaluate generation 3346: reward = -85.94, steps = 129\n",
      "10:53:24 [DEBUG] evaluate generation 3347: reward = -86.24, steps = 130\n",
      "10:53:26 [DEBUG] evaluate generation 3348: reward = -85.74, steps = 128\n",
      "10:53:30 [DEBUG] evaluate generation 3349: reward = -84.86, steps = 128\n",
      "10:53:32 [DEBUG] evaluate generation 3350: reward = -86.21, steps = 130\n",
      "10:53:35 [DEBUG] evaluate generation 3351: reward = -85.76, steps = 129\n",
      "10:53:37 [DEBUG] evaluate generation 3352: reward = -53.68, steps = 295\n",
      "10:53:39 [DEBUG] evaluate generation 3353: reward = -87.28, steps = 128\n",
      "10:53:41 [DEBUG] evaluate generation 3354: reward = -86.40, steps = 128\n",
      "10:53:44 [DEBUG] evaluate generation 3355: reward = -74.79, steps = 160\n",
      "10:53:47 [DEBUG] evaluate generation 3356: reward = -76.35, steps = 184\n",
      "10:53:49 [DEBUG] evaluate generation 3357: reward = -46.09, steps = 311\n",
      "10:53:52 [DEBUG] evaluate generation 3358: reward = -85.09, steps = 130\n",
      "10:53:54 [DEBUG] evaluate generation 3359: reward = -85.69, steps = 131\n",
      "10:53:56 [DEBUG] evaluate generation 3360: reward = -74.92, steps = 195\n",
      "10:53:59 [DEBUG] evaluate generation 3361: reward = -45.29, steps = 398\n",
      "10:54:03 [DEBUG] evaluate generation 3362: reward = -75.17, steps = 193\n",
      "10:54:06 [DEBUG] evaluate generation 3363: reward = 5.61, steps = 679\n",
      "10:54:09 [DEBUG] evaluate generation 3364: reward = -56.49, steps = 317\n",
      "10:54:11 [DEBUG] evaluate generation 3365: reward = -35.57, steps = 432\n",
      "10:54:13 [DEBUG] evaluate generation 3366: reward = -58.74, steps = 253\n",
      "10:54:16 [DEBUG] evaluate generation 3367: reward = -52.89, steps = 299\n",
      "10:54:19 [DEBUG] evaluate generation 3368: reward = -85.32, steps = 133\n",
      "10:54:23 [DEBUG] evaluate generation 3369: reward = -77.22, steps = 200\n",
      "10:54:25 [DEBUG] evaluate generation 3370: reward = -85.37, steps = 132\n",
      "10:54:29 [DEBUG] evaluate generation 3371: reward = -85.94, steps = 131\n",
      "10:54:33 [DEBUG] evaluate generation 3372: reward = -33.72, steps = 379\n",
      "10:54:35 [DEBUG] evaluate generation 3373: reward = -65.72, steps = 220\n",
      "10:54:37 [DEBUG] evaluate generation 3374: reward = 6.46, steps = 770\n",
      "10:54:40 [DEBUG] evaluate generation 3375: reward = -35.19, steps = 381\n",
      "10:54:43 [DEBUG] evaluate generation 3376: reward = -19.49, steps = 497\n",
      "10:54:46 [DEBUG] evaluate generation 3377: reward = -85.79, steps = 129\n",
      "10:54:47 [DEBUG] evaluate generation 3378: reward = -85.47, steps = 126\n",
      "10:54:50 [DEBUG] evaluate generation 3379: reward = -85.85, steps = 129\n",
      "10:54:53 [DEBUG] evaluate generation 3380: reward = -75.05, steps = 176\n",
      "10:54:54 [DEBUG] evaluate generation 3381: reward = -84.76, steps = 130\n",
      "10:54:57 [DEBUG] evaluate generation 3382: reward = -1.85, steps = 590\n",
      "10:55:00 [DEBUG] evaluate generation 3383: reward = -46.12, steps = 315\n",
      "10:55:02 [DEBUG] evaluate generation 3384: reward = -85.69, steps = 130\n",
      "10:55:03 [DEBUG] evaluate generation 3385: reward = -63.55, steps = 234\n",
      "10:55:05 [DEBUG] evaluate generation 3386: reward = -63.94, steps = 219\n",
      "10:55:08 [DEBUG] evaluate generation 3387: reward = -67.26, steps = 191\n",
      "10:55:10 [DEBUG] evaluate generation 3388: reward = -53.69, steps = 276\n",
      "10:55:13 [DEBUG] evaluate generation 3389: reward = -66.46, steps = 216\n",
      "10:55:15 [DEBUG] evaluate generation 3390: reward = -86.36, steps = 128\n",
      "10:55:17 [DEBUG] evaluate generation 3391: reward = -82.35, steps = 134\n",
      "10:55:20 [DEBUG] evaluate generation 3392: reward = -77.79, steps = 143\n",
      "10:55:22 [DEBUG] evaluate generation 3393: reward = -7.72, steps = 512\n",
      "10:55:25 [DEBUG] evaluate generation 3394: reward = -61.56, steps = 249\n",
      "10:55:28 [DEBUG] evaluate generation 3395: reward = -86.69, steps = 128\n",
      "10:55:32 [DEBUG] evaluate generation 3396: reward = -26.42, steps = 432\n",
      "10:55:34 [DEBUG] evaluate generation 3397: reward = -87.86, steps = 127\n",
      "10:55:37 [DEBUG] evaluate generation 3398: reward = -50.69, steps = 268\n",
      "10:55:39 [DEBUG] evaluate generation 3399: reward = -26.80, steps = 425\n",
      "10:55:43 [DEBUG] evaluate generation 3400: reward = -68.46, steps = 197\n",
      "10:55:46 [DEBUG] evaluate generation 3401: reward = -80.19, steps = 141\n",
      "10:55:48 [DEBUG] evaluate generation 3402: reward = -85.13, steps = 129\n",
      "10:55:51 [DEBUG] evaluate generation 3403: reward = -85.00, steps = 137\n",
      "10:55:54 [DEBUG] evaluate generation 3404: reward = -84.96, steps = 133\n",
      "10:55:55 [DEBUG] evaluate generation 3405: reward = -85.48, steps = 129\n",
      "10:55:58 [DEBUG] evaluate generation 3406: reward = -85.70, steps = 128\n",
      "10:56:01 [DEBUG] evaluate generation 3407: reward = -3.12, steps = 565\n",
      "10:56:05 [DEBUG] evaluate generation 3408: reward = -84.78, steps = 132\n",
      "10:56:07 [DEBUG] evaluate generation 3409: reward = -76.45, steps = 158\n",
      "10:56:09 [DEBUG] evaluate generation 3410: reward = -84.95, steps = 130\n",
      "10:56:11 [DEBUG] evaluate generation 3411: reward = -48.47, steps = 359\n",
      "10:56:13 [DEBUG] evaluate generation 3412: reward = -76.90, steps = 147\n",
      "10:56:16 [DEBUG] evaluate generation 3413: reward = -86.01, steps = 130\n",
      "10:56:19 [DEBUG] evaluate generation 3414: reward = -87.49, steps = 127\n",
      "10:56:22 [DEBUG] evaluate generation 3415: reward = -85.75, steps = 151\n",
      "10:56:24 [DEBUG] evaluate generation 3416: reward = -53.32, steps = 311\n",
      "10:56:27 [DEBUG] evaluate generation 3417: reward = -67.44, steps = 208\n",
      "10:56:29 [DEBUG] evaluate generation 3418: reward = -86.86, steps = 127\n",
      "10:56:32 [DEBUG] evaluate generation 3419: reward = -86.89, steps = 128\n",
      "10:56:36 [DEBUG] evaluate generation 3420: reward = -68.38, steps = 223\n",
      "10:56:41 [DEBUG] evaluate generation 3421: reward = -86.47, steps = 124\n",
      "10:56:44 [DEBUG] evaluate generation 3422: reward = -86.41, steps = 129\n",
      "10:56:47 [DEBUG] evaluate generation 3423: reward = -64.88, steps = 215\n",
      "10:56:50 [DEBUG] evaluate generation 3424: reward = -65.01, steps = 239\n",
      "10:56:54 [DEBUG] evaluate generation 3425: reward = -87.02, steps = 124\n",
      "10:56:56 [DEBUG] evaluate generation 3426: reward = -87.24, steps = 123\n",
      "10:56:58 [DEBUG] evaluate generation 3427: reward = -84.86, steps = 124\n",
      "10:57:02 [DEBUG] evaluate generation 3428: reward = -65.26, steps = 251\n",
      "10:57:05 [DEBUG] evaluate generation 3429: reward = -85.10, steps = 128\n",
      "10:57:07 [DEBUG] evaluate generation 3430: reward = -68.07, steps = 236\n",
      "10:57:10 [DEBUG] evaluate generation 3431: reward = -60.51, steps = 243\n",
      "10:57:13 [DEBUG] evaluate generation 3432: reward = -88.04, steps = 123\n",
      "10:57:15 [DEBUG] evaluate generation 3433: reward = -75.43, steps = 189\n",
      "10:57:18 [DEBUG] evaluate generation 3434: reward = -85.22, steps = 126\n",
      "10:57:20 [DEBUG] evaluate generation 3435: reward = -29.78, steps = 447\n",
      "10:57:24 [DEBUG] evaluate generation 3436: reward = -61.59, steps = 216\n",
      "10:57:27 [DEBUG] evaluate generation 3437: reward = -12.42, steps = 577\n",
      "10:57:31 [DEBUG] evaluate generation 3438: reward = -67.45, steps = 250\n",
      "10:57:34 [DEBUG] evaluate generation 3439: reward = -39.87, steps = 403\n",
      "10:57:37 [DEBUG] evaluate generation 3440: reward = 30.42, steps = 822\n",
      "10:57:41 [DEBUG] evaluate generation 3441: reward = -48.56, steps = 372\n",
      "10:57:45 [DEBUG] evaluate generation 3442: reward = -47.92, steps = 366\n",
      "10:57:48 [DEBUG] evaluate generation 3443: reward = -62.17, steps = 214\n",
      "10:57:51 [DEBUG] evaluate generation 3444: reward = -41.90, steps = 325\n",
      "10:57:55 [DEBUG] evaluate generation 3445: reward = -65.99, steps = 220\n",
      "10:57:58 [DEBUG] evaluate generation 3446: reward = -12.53, steps = 561\n",
      "10:58:01 [DEBUG] evaluate generation 3447: reward = -66.01, steps = 214\n",
      "10:58:05 [DEBUG] evaluate generation 3448: reward = -69.30, steps = 242\n",
      "10:58:07 [DEBUG] evaluate generation 3449: reward = -85.60, steps = 125\n",
      "10:58:11 [DEBUG] evaluate generation 3450: reward = -87.07, steps = 123\n",
      "10:58:13 [DEBUG] evaluate generation 3451: reward = -22.75, steps = 550\n",
      "10:58:16 [DEBUG] evaluate generation 3452: reward = -87.14, steps = 123\n",
      "10:58:19 [DEBUG] evaluate generation 3453: reward = -85.80, steps = 124\n",
      "10:58:23 [DEBUG] evaluate generation 3454: reward = -86.85, steps = 125\n",
      "10:58:26 [DEBUG] evaluate generation 3455: reward = -84.75, steps = 126\n",
      "10:58:29 [DEBUG] evaluate generation 3456: reward = -66.01, steps = 232\n",
      "10:58:33 [DEBUG] evaluate generation 3457: reward = -12.61, steps = 609\n",
      "10:58:35 [DEBUG] evaluate generation 3458: reward = -31.09, steps = 481\n",
      "10:58:39 [DEBUG] evaluate generation 3459: reward = -34.17, steps = 376\n",
      "10:58:42 [DEBUG] evaluate generation 3460: reward = 2.30, steps = 697\n",
      "10:58:45 [DEBUG] evaluate generation 3461: reward = -13.56, steps = 567\n",
      "10:58:48 [DEBUG] evaluate generation 3462: reward = -25.53, steps = 631\n",
      "10:58:51 [DEBUG] evaluate generation 3463: reward = -68.75, steps = 261\n",
      "10:58:53 [DEBUG] evaluate generation 3464: reward = -78.66, steps = 194\n",
      "10:58:55 [DEBUG] evaluate generation 3465: reward = -88.04, steps = 125\n",
      "10:58:58 [DEBUG] evaluate generation 3466: reward = -85.54, steps = 127\n",
      "10:59:01 [DEBUG] evaluate generation 3467: reward = -8.00, steps = 694\n",
      "10:59:06 [DEBUG] evaluate generation 3468: reward = -18.57, steps = 754\n",
      "10:59:10 [DEBUG] evaluate generation 3469: reward = -87.60, steps = 125\n",
      "10:59:14 [DEBUG] evaluate generation 3470: reward = -77.17, steps = 193\n",
      "10:59:21 [DEBUG] evaluate generation 3471: reward = -0.95, steps = 852\n",
      "10:59:26 [DEBUG] evaluate generation 3472: reward = -54.93, steps = 381\n",
      "10:59:30 [DEBUG] evaluate generation 3473: reward = 75.52, steps = 1430\n",
      "10:59:32 [DEBUG] evaluate generation 3474: reward = -70.51, steps = 262\n",
      "10:59:35 [DEBUG] evaluate generation 3475: reward = -79.10, steps = 188\n",
      "10:59:38 [DEBUG] evaluate generation 3476: reward = -85.02, steps = 124\n",
      "10:59:42 [DEBUG] evaluate generation 3477: reward = -77.51, steps = 188\n",
      "10:59:45 [DEBUG] evaluate generation 3478: reward = -23.55, steps = 499\n",
      "10:59:48 [DEBUG] evaluate generation 3479: reward = -50.49, steps = 298\n",
      "10:59:52 [DEBUG] evaluate generation 3480: reward = 114.47, steps = 1399\n",
      "10:59:55 [DEBUG] evaluate generation 3481: reward = -86.11, steps = 127\n",
      "10:59:58 [DEBUG] evaluate generation 3482: reward = -76.78, steps = 175\n",
      "11:00:01 [DEBUG] evaluate generation 3483: reward = -25.26, steps = 542\n",
      "11:00:03 [DEBUG] evaluate generation 3484: reward = -64.28, steps = 231\n",
      "11:00:06 [DEBUG] evaluate generation 3485: reward = -86.14, steps = 123\n",
      "11:00:09 [DEBUG] evaluate generation 3486: reward = -26.98, steps = 501\n",
      "11:00:12 [DEBUG] evaluate generation 3487: reward = -87.36, steps = 125\n",
      "11:00:15 [DEBUG] evaluate generation 3488: reward = -87.92, steps = 125\n",
      "11:00:19 [DEBUG] evaluate generation 3489: reward = -79.52, steps = 186\n",
      "11:00:23 [DEBUG] evaluate generation 3490: reward = -3.35, steps = 717\n",
      "11:00:27 [DEBUG] evaluate generation 3491: reward = -58.96, steps = 384\n",
      "11:00:30 [DEBUG] evaluate generation 3492: reward = -87.36, steps = 125\n",
      "11:00:34 [DEBUG] evaluate generation 3493: reward = -72.13, steps = 262\n",
      "11:00:38 [DEBUG] evaluate generation 3494: reward = -87.59, steps = 124\n",
      "11:00:42 [DEBUG] evaluate generation 3495: reward = -60.24, steps = 389\n",
      "11:00:44 [DEBUG] evaluate generation 3496: reward = -67.18, steps = 301\n",
      "11:00:49 [DEBUG] evaluate generation 3497: reward = -87.29, steps = 126\n",
      "11:00:53 [DEBUG] evaluate generation 3498: reward = -23.27, steps = 557\n",
      "11:00:57 [DEBUG] evaluate generation 3499: reward = -18.19, steps = 636\n",
      "11:01:00 [DEBUG] evaluate generation 3500: reward = -86.68, steps = 126\n",
      "11:01:06 [DEBUG] evaluate generation 3501: reward = -70.62, steps = 240\n",
      "11:01:11 [DEBUG] evaluate generation 3502: reward = -86.63, steps = 128\n",
      "11:01:13 [DEBUG] evaluate generation 3503: reward = -71.64, steps = 238\n",
      "11:01:17 [DEBUG] evaluate generation 3504: reward = 29.52, steps = 1008\n",
      "11:01:23 [DEBUG] evaluate generation 3505: reward = -47.17, steps = 344\n",
      "11:01:27 [DEBUG] evaluate generation 3506: reward = -61.70, steps = 318\n",
      "11:01:31 [DEBUG] evaluate generation 3507: reward = -63.36, steps = 226\n",
      "11:01:35 [DEBUG] evaluate generation 3508: reward = -70.48, steps = 244\n",
      "11:01:39 [DEBUG] evaluate generation 3509: reward = -42.56, steps = 515\n",
      "11:01:42 [DEBUG] evaluate generation 3510: reward = -75.19, steps = 178\n",
      "11:01:47 [DEBUG] evaluate generation 3511: reward = -18.12, steps = 681\n",
      "11:01:50 [DEBUG] evaluate generation 3512: reward = -86.36, steps = 126\n",
      "11:01:54 [DEBUG] evaluate generation 3513: reward = -37.06, steps = 488\n",
      "11:01:57 [DEBUG] evaluate generation 3514: reward = -49.27, steps = 341\n",
      "11:02:00 [DEBUG] evaluate generation 3515: reward = -86.49, steps = 130\n",
      "11:02:05 [DEBUG] evaluate generation 3516: reward = -87.29, steps = 125\n",
      "11:02:09 [DEBUG] evaluate generation 3517: reward = -61.32, steps = 264\n",
      "11:02:13 [DEBUG] evaluate generation 3518: reward = 14.81, steps = 954\n",
      "11:02:19 [DEBUG] evaluate generation 3519: reward = -55.83, steps = 377\n",
      "11:02:22 [DEBUG] evaluate generation 3520: reward = -58.48, steps = 282\n",
      "11:02:24 [DEBUG] evaluate generation 3521: reward = -57.27, steps = 234\n",
      "11:02:30 [DEBUG] evaluate generation 3522: reward = -56.07, steps = 309\n",
      "11:02:32 [DEBUG] evaluate generation 3523: reward = -87.06, steps = 123\n",
      "11:02:36 [DEBUG] evaluate generation 3524: reward = 18.70, steps = 852\n",
      "11:02:40 [DEBUG] evaluate generation 3525: reward = 22.15, steps = 901\n",
      "11:02:43 [DEBUG] evaluate generation 3526: reward = -87.60, steps = 123\n",
      "11:02:48 [DEBUG] evaluate generation 3527: reward = -87.15, steps = 126\n",
      "11:02:50 [DEBUG] evaluate generation 3528: reward = -42.24, steps = 466\n",
      "11:02:54 [DEBUG] evaluate generation 3529: reward = -6.33, steps = 681\n",
      "11:02:58 [DEBUG] evaluate generation 3530: reward = -86.72, steps = 125\n",
      "11:03:02 [DEBUG] evaluate generation 3531: reward = -14.43, steps = 618\n",
      "11:03:04 [DEBUG] evaluate generation 3532: reward = -79.41, steps = 188\n",
      "11:03:08 [DEBUG] evaluate generation 3533: reward = -87.32, steps = 125\n",
      "11:03:11 [DEBUG] evaluate generation 3534: reward = -56.75, steps = 299\n",
      "11:03:16 [DEBUG] evaluate generation 3535: reward = 6.86, steps = 903\n",
      "11:03:19 [DEBUG] evaluate generation 3536: reward = -33.59, steps = 398\n",
      "11:03:23 [DEBUG] evaluate generation 3537: reward = -59.97, steps = 359\n",
      "11:03:30 [DEBUG] evaluate generation 3538: reward = 185.89, steps = 1600\n",
      "11:03:35 [DEBUG] evaluate generation 3539: reward = -56.46, steps = 265\n",
      "11:03:37 [DEBUG] evaluate generation 3540: reward = -55.86, steps = 281\n",
      "11:03:40 [DEBUG] evaluate generation 3541: reward = -65.36, steps = 301\n",
      "11:03:44 [DEBUG] evaluate generation 3542: reward = -25.84, steps = 514\n",
      "11:03:47 [DEBUG] evaluate generation 3543: reward = -48.58, steps = 361\n",
      "11:03:51 [DEBUG] evaluate generation 3544: reward = -56.34, steps = 366\n",
      "11:03:54 [DEBUG] evaluate generation 3545: reward = -86.97, steps = 122\n",
      "11:03:57 [DEBUG] evaluate generation 3546: reward = -73.76, steps = 180\n",
      "11:04:00 [DEBUG] evaluate generation 3547: reward = -86.80, steps = 125\n",
      "11:04:05 [DEBUG] evaluate generation 3548: reward = -49.77, steps = 345\n",
      "11:04:08 [DEBUG] evaluate generation 3549: reward = -56.57, steps = 239\n",
      "11:04:11 [DEBUG] evaluate generation 3550: reward = -60.70, steps = 263\n",
      "11:04:15 [DEBUG] evaluate generation 3551: reward = 10.95, steps = 737\n",
      "11:04:19 [DEBUG] evaluate generation 3552: reward = -77.07, steps = 179\n",
      "11:04:24 [DEBUG] evaluate generation 3553: reward = -61.67, steps = 299\n",
      "11:04:26 [DEBUG] evaluate generation 3554: reward = -67.58, steps = 269\n",
      "11:04:30 [DEBUG] evaluate generation 3555: reward = -77.54, steps = 180\n",
      "11:04:34 [DEBUG] evaluate generation 3556: reward = -63.81, steps = 282\n",
      "11:04:37 [DEBUG] evaluate generation 3557: reward = -36.27, steps = 482\n",
      "11:04:41 [DEBUG] evaluate generation 3558: reward = -78.75, steps = 190\n",
      "11:04:45 [DEBUG] evaluate generation 3559: reward = -69.47, steps = 187\n",
      "11:04:48 [DEBUG] evaluate generation 3560: reward = -67.22, steps = 260\n",
      "11:04:53 [DEBUG] evaluate generation 3561: reward = 40.45, steps = 1044\n",
      "11:04:56 [DEBUG] evaluate generation 3562: reward = 206.37, steps = 1600\n",
      "11:05:01 [DEBUG] evaluate generation 3563: reward = -18.60, steps = 510\n",
      "11:05:06 [DEBUG] evaluate generation 3564: reward = 12.17, steps = 834\n",
      "11:05:09 [DEBUG] evaluate generation 3565: reward = -64.57, steps = 303\n",
      "11:05:13 [DEBUG] evaluate generation 3566: reward = -78.88, steps = 183\n",
      "11:05:16 [DEBUG] evaluate generation 3567: reward = -64.23, steps = 306\n",
      "11:05:20 [DEBUG] evaluate generation 3568: reward = -53.58, steps = 356\n",
      "11:05:27 [DEBUG] evaluate generation 3569: reward = -77.12, steps = 184\n",
      "11:05:32 [DEBUG] evaluate generation 3570: reward = 205.13, steps = 1600\n",
      "11:05:39 [DEBUG] evaluate generation 3571: reward = -35.86, steps = 494\n",
      "11:05:41 [DEBUG] evaluate generation 3572: reward = -69.96, steps = 186\n",
      "11:05:46 [DEBUG] evaluate generation 3573: reward = 39.56, steps = 1172\n",
      "11:05:50 [DEBUG] evaluate generation 3574: reward = -78.69, steps = 181\n",
      "11:05:54 [DEBUG] evaluate generation 3575: reward = 5.15, steps = 756\n",
      "11:05:58 [DEBUG] evaluate generation 3576: reward = 1.84, steps = 561\n",
      "11:06:02 [DEBUG] evaluate generation 3577: reward = -34.85, steps = 428\n",
      "11:06:06 [DEBUG] evaluate generation 3578: reward = -38.58, steps = 415\n",
      "11:06:09 [DEBUG] evaluate generation 3579: reward = -63.45, steps = 305\n",
      "11:06:12 [DEBUG] evaluate generation 3580: reward = -56.11, steps = 360\n",
      "11:06:16 [DEBUG] evaluate generation 3581: reward = -64.18, steps = 284\n",
      "11:06:20 [DEBUG] evaluate generation 3582: reward = 56.38, steps = 1076\n",
      "11:06:24 [DEBUG] evaluate generation 3583: reward = -79.45, steps = 174\n",
      "11:06:28 [DEBUG] evaluate generation 3584: reward = 36.41, steps = 965\n",
      "11:06:33 [DEBUG] evaluate generation 3585: reward = 37.57, steps = 982\n",
      "11:06:39 [DEBUG] evaluate generation 3586: reward = -64.06, steps = 305\n",
      "11:06:44 [DEBUG] evaluate generation 3587: reward = 45.36, steps = 1000\n",
      "11:06:47 [DEBUG] evaluate generation 3588: reward = -39.80, steps = 521\n",
      "11:06:52 [DEBUG] evaluate generation 3589: reward = 28.06, steps = 1219\n",
      "11:06:57 [DEBUG] evaluate generation 3590: reward = -28.94, steps = 588\n",
      "11:07:02 [DEBUG] evaluate generation 3591: reward = 24.65, steps = 939\n",
      "11:07:08 [DEBUG] evaluate generation 3592: reward = 21.33, steps = 864\n",
      "11:07:13 [DEBUG] evaluate generation 3593: reward = 210.99, steps = 1600\n",
      "11:07:18 [DEBUG] evaluate generation 3594: reward = -46.45, steps = 421\n",
      "11:07:22 [DEBUG] evaluate generation 3595: reward = 99.82, steps = 1448\n",
      "11:07:30 [DEBUG] evaluate generation 3596: reward = 78.01, steps = 1374\n",
      "11:07:34 [DEBUG] evaluate generation 3597: reward = 98.91, steps = 1560\n",
      "11:07:38 [DEBUG] evaluate generation 3598: reward = -56.79, steps = 355\n",
      "11:07:43 [DEBUG] evaluate generation 3599: reward = 216.57, steps = 1600\n",
      "11:07:47 [DEBUG] evaluate generation 3600: reward = -60.88, steps = 220\n",
      "11:07:50 [DEBUG] evaluate generation 3601: reward = 45.32, steps = 1070\n",
      "11:07:56 [DEBUG] evaluate generation 3602: reward = 57.83, steps = 1165\n",
      "11:08:00 [DEBUG] evaluate generation 3603: reward = -33.00, steps = 520\n",
      "11:08:04 [DEBUG] evaluate generation 3604: reward = 207.19, steps = 1600\n",
      "11:08:07 [DEBUG] evaluate generation 3605: reward = -43.69, steps = 455\n",
      "11:08:12 [DEBUG] evaluate generation 3606: reward = 7.64, steps = 798\n",
      "11:08:17 [DEBUG] evaluate generation 3607: reward = 205.76, steps = 1600\n",
      "11:08:23 [DEBUG] evaluate generation 3608: reward = -63.51, steps = 286\n",
      "11:08:28 [DEBUG] evaluate generation 3609: reward = -86.92, steps = 129\n",
      "11:08:33 [DEBUG] evaluate generation 3610: reward = -20.10, steps = 600\n",
      "11:08:38 [DEBUG] evaluate generation 3611: reward = 6.84, steps = 880\n",
      "11:08:42 [DEBUG] evaluate generation 3612: reward = -65.97, steps = 248\n",
      "11:08:49 [DEBUG] evaluate generation 3613: reward = -46.32, steps = 378\n",
      "11:08:55 [DEBUG] evaluate generation 3614: reward = 208.95, steps = 1600\n",
      "11:08:59 [DEBUG] evaluate generation 3615: reward = -28.07, steps = 493\n",
      "11:09:04 [DEBUG] evaluate generation 3616: reward = 84.42, steps = 1249\n",
      "11:09:09 [DEBUG] evaluate generation 3617: reward = -34.44, steps = 395\n",
      "11:09:13 [DEBUG] evaluate generation 3618: reward = -51.74, steps = 284\n",
      "11:09:17 [DEBUG] evaluate generation 3619: reward = 35.67, steps = 942\n",
      "11:09:22 [DEBUG] evaluate generation 3620: reward = -53.18, steps = 343\n",
      "11:09:26 [DEBUG] evaluate generation 3621: reward = 93.17, steps = 1352\n",
      "11:09:30 [DEBUG] evaluate generation 3622: reward = -56.82, steps = 282\n",
      "11:09:35 [DEBUG] evaluate generation 3623: reward = -25.92, steps = 576\n",
      "11:09:40 [DEBUG] evaluate generation 3624: reward = 209.53, steps = 1600\n",
      "11:09:45 [DEBUG] evaluate generation 3625: reward = -17.63, steps = 626\n",
      "11:09:49 [DEBUG] evaluate generation 3626: reward = 24.81, steps = 921\n",
      "11:09:56 [DEBUG] evaluate generation 3627: reward = 211.97, steps = 1600\n",
      "11:10:01 [DEBUG] evaluate generation 3628: reward = -40.22, steps = 449\n",
      "11:10:06 [DEBUG] evaluate generation 3629: reward = -41.83, steps = 459\n",
      "11:10:11 [DEBUG] evaluate generation 3630: reward = -26.50, steps = 551\n",
      "11:10:15 [DEBUG] evaluate generation 3631: reward = -73.89, steps = 232\n",
      "11:10:20 [DEBUG] evaluate generation 3632: reward = 48.25, steps = 1124\n",
      "11:10:25 [DEBUG] evaluate generation 3633: reward = -41.90, steps = 511\n",
      "11:10:30 [DEBUG] evaluate generation 3634: reward = -50.61, steps = 382\n",
      "11:10:37 [DEBUG] evaluate generation 3635: reward = 6.40, steps = 753\n",
      "11:10:42 [DEBUG] evaluate generation 3636: reward = -13.61, steps = 610\n",
      "11:10:47 [DEBUG] evaluate generation 3637: reward = -71.22, steps = 238\n",
      "11:10:53 [DEBUG] evaluate generation 3638: reward = -42.39, steps = 484\n",
      "11:11:00 [DEBUG] evaluate generation 3639: reward = 198.54, steps = 1600\n",
      "11:11:04 [DEBUG] evaluate generation 3640: reward = -87.94, steps = 119\n",
      "11:11:11 [DEBUG] evaluate generation 3641: reward = -5.31, steps = 752\n",
      "11:11:16 [DEBUG] evaluate generation 3642: reward = -53.07, steps = 386\n",
      "11:11:20 [DEBUG] evaluate generation 3643: reward = -42.79, steps = 449\n",
      "11:11:25 [DEBUG] evaluate generation 3644: reward = 211.73, steps = 1600\n",
      "11:11:29 [DEBUG] evaluate generation 3645: reward = -51.57, steps = 387\n",
      "11:11:33 [DEBUG] evaluate generation 3646: reward = -10.86, steps = 578\n",
      "11:11:39 [DEBUG] evaluate generation 3647: reward = 16.48, steps = 823\n",
      "11:11:43 [DEBUG] evaluate generation 3648: reward = -80.93, steps = 170\n",
      "11:11:47 [DEBUG] evaluate generation 3649: reward = 56.13, steps = 1177\n",
      "11:11:52 [DEBUG] evaluate generation 3650: reward = -70.64, steps = 242\n",
      "11:11:57 [DEBUG] evaluate generation 3651: reward = 61.83, steps = 1168\n",
      "11:12:01 [DEBUG] evaluate generation 3652: reward = 27.56, steps = 963\n",
      "11:12:05 [DEBUG] evaluate generation 3653: reward = 42.31, steps = 1051\n",
      "11:12:09 [DEBUG] evaluate generation 3654: reward = -79.75, steps = 174\n",
      "11:12:13 [DEBUG] evaluate generation 3655: reward = -86.66, steps = 126\n",
      "11:12:17 [DEBUG] evaluate generation 3656: reward = -13.33, steps = 644\n",
      "11:12:21 [DEBUG] evaluate generation 3657: reward = -22.52, steps = 613\n",
      "11:12:25 [DEBUG] evaluate generation 3658: reward = -25.01, steps = 566\n",
      "11:12:33 [DEBUG] evaluate generation 3659: reward = 53.89, steps = 1250\n",
      "11:12:40 [DEBUG] evaluate generation 3660: reward = -66.53, steps = 294\n",
      "11:12:47 [DEBUG] evaluate generation 3661: reward = -45.19, steps = 463\n",
      "11:12:53 [DEBUG] evaluate generation 3662: reward = -110.97, steps = 241\n",
      "11:12:57 [DEBUG] evaluate generation 3663: reward = -30.95, steps = 512\n",
      "11:13:05 [DEBUG] evaluate generation 3664: reward = 31.41, steps = 999\n",
      "11:13:10 [DEBUG] evaluate generation 3665: reward = -70.92, steps = 244\n",
      "11:13:15 [DEBUG] evaluate generation 3666: reward = -49.24, steps = 420\n",
      "11:13:20 [DEBUG] evaluate generation 3667: reward = -79.44, steps = 163\n",
      "11:13:26 [DEBUG] evaluate generation 3668: reward = -58.22, steps = 346\n",
      "11:13:30 [DEBUG] evaluate generation 3669: reward = 18.05, steps = 977\n",
      "11:13:35 [DEBUG] evaluate generation 3670: reward = -57.76, steps = 356\n",
      "11:13:40 [DEBUG] evaluate generation 3671: reward = 38.46, steps = 1074\n",
      "11:13:45 [DEBUG] evaluate generation 3672: reward = -64.95, steps = 313\n",
      "11:13:49 [DEBUG] evaluate generation 3673: reward = -12.70, steps = 684\n",
      "11:13:53 [DEBUG] evaluate generation 3674: reward = 10.57, steps = 850\n",
      "11:13:59 [DEBUG] evaluate generation 3675: reward = 6.70, steps = 833\n",
      "11:14:06 [DEBUG] evaluate generation 3676: reward = -50.33, steps = 406\n",
      "11:14:11 [DEBUG] evaluate generation 3677: reward = -78.46, steps = 139\n",
      "11:14:16 [DEBUG] evaluate generation 3678: reward = 70.60, steps = 1188\n",
      "11:14:21 [DEBUG] evaluate generation 3679: reward = 84.34, steps = 1227\n",
      "11:14:29 [DEBUG] evaluate generation 3680: reward = 46.12, steps = 1044\n",
      "11:14:33 [DEBUG] evaluate generation 3681: reward = -8.35, steps = 616\n",
      "11:14:39 [DEBUG] evaluate generation 3682: reward = 69.90, steps = 1091\n",
      "11:14:43 [DEBUG] evaluate generation 3683: reward = -41.49, steps = 393\n",
      "11:14:48 [DEBUG] evaluate generation 3684: reward = -61.56, steps = 239\n",
      "11:14:54 [DEBUG] evaluate generation 3685: reward = -13.83, steps = 460\n",
      "11:14:58 [DEBUG] evaluate generation 3686: reward = 79.96, steps = 1301\n",
      "11:15:05 [DEBUG] evaluate generation 3687: reward = -22.60, steps = 484\n",
      "11:15:11 [DEBUG] evaluate generation 3688: reward = 216.40, steps = 1600\n",
      "11:15:17 [DEBUG] evaluate generation 3689: reward = -75.68, steps = 189\n",
      "11:15:21 [DEBUG] evaluate generation 3690: reward = 44.23, steps = 983\n",
      "11:15:27 [DEBUG] evaluate generation 3691: reward = 67.47, steps = 1196\n",
      "11:15:32 [DEBUG] evaluate generation 3692: reward = -69.15, steps = 258\n",
      "11:15:37 [DEBUG] evaluate generation 3693: reward = -85.58, steps = 125\n",
      "11:15:41 [DEBUG] evaluate generation 3694: reward = -88.43, steps = 124\n",
      "11:15:46 [DEBUG] evaluate generation 3695: reward = 195.64, steps = 1600\n",
      "11:15:50 [DEBUG] evaluate generation 3696: reward = 194.60, steps = 1600\n",
      "11:15:58 [DEBUG] evaluate generation 3697: reward = 43.12, steps = 1119\n",
      "11:16:03 [DEBUG] evaluate generation 3698: reward = -64.19, steps = 208\n",
      "11:16:10 [DEBUG] evaluate generation 3699: reward = -64.91, steps = 284\n",
      "11:16:16 [DEBUG] evaluate generation 3700: reward = -41.00, steps = 434\n",
      "11:16:23 [DEBUG] evaluate generation 3701: reward = 74.34, steps = 1372\n",
      "11:16:27 [DEBUG] evaluate generation 3702: reward = -63.95, steps = 241\n",
      "11:16:33 [DEBUG] evaluate generation 3703: reward = -69.63, steps = 223\n",
      "11:16:37 [DEBUG] evaluate generation 3704: reward = 209.25, steps = 1600\n",
      "11:16:40 [DEBUG] evaluate generation 3705: reward = -60.23, steps = 224\n",
      "11:16:46 [DEBUG] evaluate generation 3706: reward = -29.70, steps = 571\n",
      "11:16:52 [DEBUG] evaluate generation 3707: reward = 20.03, steps = 863\n",
      "11:16:57 [DEBUG] evaluate generation 3708: reward = -72.38, steps = 237\n",
      "11:17:01 [DEBUG] evaluate generation 3709: reward = -87.40, steps = 127\n",
      "11:17:06 [DEBUG] evaluate generation 3710: reward = 55.32, steps = 1263\n",
      "11:17:10 [DEBUG] evaluate generation 3711: reward = -88.33, steps = 124\n",
      "11:17:15 [DEBUG] evaluate generation 3712: reward = -80.84, steps = 186\n",
      "11:17:20 [DEBUG] evaluate generation 3713: reward = -65.99, steps = 281\n",
      "11:17:26 [DEBUG] evaluate generation 3714: reward = 16.27, steps = 957\n",
      "11:17:32 [DEBUG] evaluate generation 3715: reward = -81.03, steps = 186\n",
      "11:17:41 [DEBUG] evaluate generation 3716: reward = 64.28, steps = 1495\n",
      "11:17:46 [DEBUG] evaluate generation 3717: reward = 1.24, steps = 885\n",
      "11:17:53 [DEBUG] evaluate generation 3718: reward = -51.25, steps = 458\n",
      "11:17:59 [DEBUG] evaluate generation 3719: reward = -71.71, steps = 241\n",
      "11:18:04 [DEBUG] evaluate generation 3720: reward = -31.65, steps = 575\n",
      "11:18:09 [DEBUG] evaluate generation 3721: reward = 39.19, steps = 1343\n",
      "11:18:16 [DEBUG] evaluate generation 3722: reward = -26.90, steps = 676\n",
      "11:18:21 [DEBUG] evaluate generation 3723: reward = -75.31, steps = 233\n",
      "11:18:26 [DEBUG] evaluate generation 3724: reward = -24.00, steps = 686\n",
      "11:18:33 [DEBUG] evaluate generation 3725: reward = -39.39, steps = 789\n",
      "11:18:39 [DEBUG] evaluate generation 3726: reward = -80.77, steps = 180\n",
      "11:18:45 [DEBUG] evaluate generation 3727: reward = -76.44, steps = 255\n",
      "11:18:53 [DEBUG] evaluate generation 3728: reward = 23.13, steps = 1314\n",
      "11:18:58 [DEBUG] evaluate generation 3729: reward = -87.34, steps = 132\n",
      "11:19:07 [DEBUG] evaluate generation 3730: reward = 179.01, steps = 1600\n",
      "11:19:12 [DEBUG] evaluate generation 3731: reward = -42.69, steps = 489\n",
      "11:19:18 [DEBUG] evaluate generation 3732: reward = 190.33, steps = 1600\n",
      "11:19:24 [DEBUG] evaluate generation 3733: reward = -18.90, steps = 725\n",
      "11:19:29 [DEBUG] evaluate generation 3734: reward = 0.70, steps = 865\n",
      "11:19:35 [DEBUG] evaluate generation 3735: reward = -74.55, steps = 230\n",
      "11:19:41 [DEBUG] evaluate generation 3736: reward = -80.53, steps = 180\n",
      "11:19:49 [DEBUG] evaluate generation 3737: reward = -41.97, steps = 506\n",
      "11:19:54 [DEBUG] evaluate generation 3738: reward = -81.45, steps = 178\n",
      "11:19:59 [DEBUG] evaluate generation 3739: reward = 4.88, steps = 927\n",
      "11:20:06 [DEBUG] evaluate generation 3740: reward = -74.41, steps = 229\n",
      "11:20:15 [DEBUG] evaluate generation 3741: reward = 184.68, steps = 1600\n",
      "11:20:21 [DEBUG] evaluate generation 3742: reward = 169.27, steps = 1600\n",
      "11:20:26 [DEBUG] evaluate generation 3743: reward = -48.69, steps = 465\n",
      "11:20:30 [DEBUG] evaluate generation 3744: reward = -32.08, steps = 621\n",
      "11:20:37 [DEBUG] evaluate generation 3745: reward = -78.77, steps = 181\n",
      "11:20:45 [DEBUG] evaluate generation 3746: reward = 33.77, steps = 1202\n",
      "11:20:51 [DEBUG] evaluate generation 3747: reward = -6.54, steps = 833\n",
      "11:20:58 [DEBUG] evaluate generation 3748: reward = -36.07, steps = 601\n",
      "11:21:05 [DEBUG] evaluate generation 3749: reward = -40.70, steps = 503\n",
      "11:21:13 [DEBUG] evaluate generation 3750: reward = 197.80, steps = 1600\n",
      "11:21:19 [DEBUG] evaluate generation 3751: reward = -79.52, steps = 178\n",
      "11:21:24 [DEBUG] evaluate generation 3752: reward = 187.93, steps = 1600\n",
      "11:21:30 [DEBUG] evaluate generation 3753: reward = 7.45, steps = 938\n",
      "11:21:35 [DEBUG] evaluate generation 3754: reward = 26.78, steps = 1005\n",
      "11:21:39 [DEBUG] evaluate generation 3755: reward = -67.40, steps = 284\n",
      "11:21:46 [DEBUG] evaluate generation 3756: reward = 183.49, steps = 1600\n",
      "11:21:52 [DEBUG] evaluate generation 3757: reward = -29.46, steps = 613\n",
      "11:21:58 [DEBUG] evaluate generation 3758: reward = -80.30, steps = 182\n",
      "11:22:04 [DEBUG] evaluate generation 3759: reward = 67.05, steps = 1382\n",
      "11:22:10 [DEBUG] evaluate generation 3760: reward = -55.65, steps = 383\n",
      "11:22:17 [DEBUG] evaluate generation 3761: reward = 194.53, steps = 1600\n",
      "11:22:23 [DEBUG] evaluate generation 3762: reward = 59.87, steps = 1326\n",
      "11:22:29 [DEBUG] evaluate generation 3763: reward = 197.96, steps = 1600\n",
      "11:22:35 [DEBUG] evaluate generation 3764: reward = -0.82, steps = 719\n",
      "11:22:40 [DEBUG] evaluate generation 3765: reward = 31.21, steps = 1054\n",
      "11:22:45 [DEBUG] evaluate generation 3766: reward = 194.11, steps = 1600\n",
      "11:22:48 [DEBUG] evaluate generation 3767: reward = -28.99, steps = 597\n",
      "11:22:53 [DEBUG] evaluate generation 3768: reward = -80.66, steps = 176\n",
      "11:22:59 [DEBUG] evaluate generation 3769: reward = -75.49, steps = 226\n",
      "11:23:05 [DEBUG] evaluate generation 3770: reward = 191.54, steps = 1600\n",
      "11:23:12 [DEBUG] evaluate generation 3771: reward = 192.71, steps = 1600\n",
      "11:23:18 [DEBUG] evaluate generation 3772: reward = -59.15, steps = 340\n",
      "11:23:26 [DEBUG] evaluate generation 3773: reward = -20.31, steps = 604\n",
      "11:23:33 [DEBUG] evaluate generation 3774: reward = 203.91, steps = 1600\n",
      "11:23:40 [DEBUG] evaluate generation 3775: reward = 75.55, steps = 1369\n",
      "11:23:46 [DEBUG] evaluate generation 3776: reward = -79.16, steps = 178\n",
      "11:23:51 [DEBUG] evaluate generation 3777: reward = 199.92, steps = 1600\n",
      "11:23:57 [DEBUG] evaluate generation 3778: reward = -10.00, steps = 743\n",
      "11:24:03 [DEBUG] evaluate generation 3779: reward = 95.13, steps = 1450\n",
      "11:24:10 [DEBUG] evaluate generation 3780: reward = 15.29, steps = 864\n",
      "11:24:14 [DEBUG] evaluate generation 3781: reward = -8.99, steps = 698\n",
      "11:24:20 [DEBUG] evaluate generation 3782: reward = -46.56, steps = 433\n",
      "11:24:27 [DEBUG] evaluate generation 3783: reward = 26.13, steps = 972\n",
      "11:24:32 [DEBUG] evaluate generation 3784: reward = 48.07, steps = 1113\n",
      "11:24:37 [DEBUG] evaluate generation 3785: reward = -52.21, steps = 380\n",
      "11:24:43 [DEBUG] evaluate generation 3786: reward = 19.96, steps = 1227\n",
      "11:24:49 [DEBUG] evaluate generation 3787: reward = -8.02, steps = 727\n",
      "11:24:55 [DEBUG] evaluate generation 3788: reward = -70.96, steps = 260\n",
      "11:25:01 [DEBUG] evaluate generation 3789: reward = -51.50, steps = 746\n",
      "11:25:08 [DEBUG] evaluate generation 3790: reward = 192.16, steps = 1600\n",
      "11:25:13 [DEBUG] evaluate generation 3791: reward = -73.18, steps = 220\n",
      "11:25:19 [DEBUG] evaluate generation 3792: reward = -79.42, steps = 181\n",
      "11:25:27 [DEBUG] evaluate generation 3793: reward = 202.96, steps = 1600\n",
      "11:25:35 [DEBUG] evaluate generation 3794: reward = -36.33, steps = 476\n",
      "11:25:43 [DEBUG] evaluate generation 3795: reward = 204.78, steps = 1600\n",
      "11:25:49 [DEBUG] evaluate generation 3796: reward = -47.08, steps = 374\n",
      "11:25:55 [DEBUG] evaluate generation 3797: reward = 9.45, steps = 838\n",
      "11:26:02 [DEBUG] evaluate generation 3798: reward = -48.24, steps = 399\n",
      "11:26:08 [DEBUG] evaluate generation 3799: reward = -27.97, steps = 547\n",
      "11:26:14 [DEBUG] evaluate generation 3800: reward = -72.99, steps = 219\n",
      "11:26:21 [DEBUG] evaluate generation 3801: reward = 68.78, steps = 1241\n",
      "11:26:27 [DEBUG] evaluate generation 3802: reward = 85.43, steps = 1435\n",
      "11:26:34 [DEBUG] evaluate generation 3803: reward = -0.43, steps = 787\n",
      "11:26:40 [DEBUG] evaluate generation 3804: reward = 197.05, steps = 1600\n",
      "11:26:46 [DEBUG] evaluate generation 3805: reward = 16.67, steps = 926\n",
      "11:26:53 [DEBUG] evaluate generation 3806: reward = -49.91, steps = 401\n",
      "11:27:00 [DEBUG] evaluate generation 3807: reward = 52.33, steps = 1233\n",
      "11:27:08 [DEBUG] evaluate generation 3808: reward = 197.65, steps = 1600\n",
      "11:27:15 [DEBUG] evaluate generation 3809: reward = 194.63, steps = 1600\n",
      "11:27:22 [DEBUG] evaluate generation 3810: reward = -73.08, steps = 226\n",
      "11:27:28 [DEBUG] evaluate generation 3811: reward = 35.95, steps = 1124\n",
      "11:27:33 [DEBUG] evaluate generation 3812: reward = -60.62, steps = 331\n",
      "11:27:38 [DEBUG] evaluate generation 3813: reward = 182.41, steps = 1600\n",
      "11:27:46 [DEBUG] evaluate generation 3814: reward = 183.31, steps = 1600\n",
      "11:27:52 [DEBUG] evaluate generation 3815: reward = -54.83, steps = 409\n",
      "11:27:56 [DEBUG] evaluate generation 3816: reward = 188.88, steps = 1600\n",
      "11:28:03 [DEBUG] evaluate generation 3817: reward = 190.13, steps = 1600\n",
      "11:28:09 [DEBUG] evaluate generation 3818: reward = 24.39, steps = 1065\n",
      "11:28:18 [DEBUG] evaluate generation 3819: reward = -72.64, steps = 251\n",
      "11:28:23 [DEBUG] evaluate generation 3820: reward = -16.14, steps = 690\n",
      "11:28:28 [DEBUG] evaluate generation 3821: reward = 188.85, steps = 1600\n",
      "11:28:35 [DEBUG] evaluate generation 3822: reward = 47.79, steps = 1325\n",
      "11:28:43 [DEBUG] evaluate generation 3823: reward = -30.78, steps = 617\n",
      "11:28:51 [DEBUG] evaluate generation 3824: reward = 181.87, steps = 1600\n",
      "11:28:59 [DEBUG] evaluate generation 3825: reward = 175.30, steps = 1600\n",
      "11:29:05 [DEBUG] evaluate generation 3826: reward = -74.60, steps = 225\n",
      "11:29:15 [DEBUG] evaluate generation 3827: reward = 168.26, steps = 1600\n",
      "11:29:23 [DEBUG] evaluate generation 3828: reward = -11.97, steps = 767\n",
      "11:29:33 [DEBUG] evaluate generation 3829: reward = 6.01, steps = 947\n",
      "11:29:41 [DEBUG] evaluate generation 3830: reward = 39.56, steps = 1272\n",
      "11:29:48 [DEBUG] evaluate generation 3831: reward = -34.28, steps = 445\n",
      "11:29:56 [DEBUG] evaluate generation 3832: reward = -8.84, steps = 812\n",
      "11:30:03 [DEBUG] evaluate generation 3833: reward = -33.87, steps = 583\n",
      "11:30:11 [DEBUG] evaluate generation 3834: reward = 45.45, steps = 1317\n",
      "11:30:16 [DEBUG] evaluate generation 3835: reward = 0.78, steps = 921\n",
      "11:30:22 [DEBUG] evaluate generation 3836: reward = 61.25, steps = 1391\n",
      "11:30:31 [DEBUG] evaluate generation 3837: reward = 173.87, steps = 1600\n",
      "11:30:38 [DEBUG] evaluate generation 3838: reward = 171.82, steps = 1600\n",
      "11:30:43 [DEBUG] evaluate generation 3839: reward = -18.32, steps = 635\n",
      "11:30:52 [DEBUG] evaluate generation 3840: reward = 179.85, steps = 1600\n",
      "11:31:00 [DEBUG] evaluate generation 3841: reward = 87.58, steps = 1551\n",
      "11:31:07 [DEBUG] evaluate generation 3842: reward = -14.48, steps = 728\n",
      "11:31:16 [DEBUG] evaluate generation 3843: reward = 173.39, steps = 1600\n",
      "11:31:23 [DEBUG] evaluate generation 3844: reward = 168.54, steps = 1600\n",
      "11:31:32 [DEBUG] evaluate generation 3845: reward = 181.55, steps = 1600\n",
      "11:31:40 [DEBUG] evaluate generation 3846: reward = -38.06, steps = 543\n",
      "11:31:48 [DEBUG] evaluate generation 3847: reward = 171.77, steps = 1600\n",
      "11:31:56 [DEBUG] evaluate generation 3848: reward = 53.60, steps = 1409\n",
      "11:32:03 [DEBUG] evaluate generation 3849: reward = 179.89, steps = 1600\n",
      "11:32:11 [DEBUG] evaluate generation 3850: reward = -33.37, steps = 605\n",
      "11:32:24 [DEBUG] evaluate generation 3851: reward = 171.76, steps = 1600\n",
      "11:32:34 [DEBUG] evaluate generation 3852: reward = 166.86, steps = 1600\n",
      "11:32:46 [DEBUG] evaluate generation 3853: reward = 178.03, steps = 1600\n",
      "11:32:54 [DEBUG] evaluate generation 3854: reward = 172.69, steps = 1600\n",
      "11:33:02 [DEBUG] evaluate generation 3855: reward = 48.95, steps = 1372\n",
      "11:33:11 [DEBUG] evaluate generation 3856: reward = 60.90, steps = 1403\n",
      "11:33:21 [DEBUG] evaluate generation 3857: reward = 176.43, steps = 1600\n",
      "11:33:31 [DEBUG] evaluate generation 3858: reward = 177.98, steps = 1600\n",
      "11:33:39 [DEBUG] evaluate generation 3859: reward = -50.67, steps = 442\n",
      "11:33:45 [DEBUG] evaluate generation 3860: reward = -68.65, steps = 273\n",
      "11:33:55 [DEBUG] evaluate generation 3861: reward = 183.58, steps = 1600\n",
      "11:34:01 [DEBUG] evaluate generation 3862: reward = 185.95, steps = 1600\n",
      "11:34:08 [DEBUG] evaluate generation 3863: reward = 182.48, steps = 1600\n",
      "11:34:18 [DEBUG] evaluate generation 3864: reward = 27.41, steps = 1077\n",
      "11:34:26 [DEBUG] evaluate generation 3865: reward = -39.64, steps = 538\n",
      "11:34:37 [DEBUG] evaluate generation 3866: reward = 177.27, steps = 1600\n",
      "11:34:46 [DEBUG] evaluate generation 3867: reward = 179.03, steps = 1600\n",
      "11:34:52 [DEBUG] evaluate generation 3868: reward = 174.02, steps = 1600\n",
      "11:35:01 [DEBUG] evaluate generation 3869: reward = 178.18, steps = 1600\n",
      "11:35:10 [DEBUG] evaluate generation 3870: reward = 177.96, steps = 1600\n",
      "11:35:18 [DEBUG] evaluate generation 3871: reward = 182.13, steps = 1600\n",
      "11:35:24 [DEBUG] evaluate generation 3872: reward = 184.70, steps = 1600\n",
      "11:35:34 [DEBUG] evaluate generation 3873: reward = 182.81, steps = 1600\n",
      "11:35:40 [DEBUG] evaluate generation 3874: reward = 178.95, steps = 1600\n",
      "11:35:49 [DEBUG] evaluate generation 3875: reward = 184.76, steps = 1600\n",
      "11:35:58 [DEBUG] evaluate generation 3876: reward = 180.64, steps = 1600\n",
      "11:36:08 [DEBUG] evaluate generation 3877: reward = 180.17, steps = 1600\n",
      "11:36:15 [DEBUG] evaluate generation 3878: reward = -67.33, steps = 291\n",
      "11:36:22 [DEBUG] evaluate generation 3879: reward = 166.02, steps = 1600\n",
      "11:36:33 [DEBUG] evaluate generation 3880: reward = 172.10, steps = 1600\n",
      "11:36:41 [DEBUG] evaluate generation 3881: reward = 177.79, steps = 1600\n",
      "11:36:50 [DEBUG] evaluate generation 3882: reward = 60.55, steps = 1477\n",
      "11:36:58 [DEBUG] evaluate generation 3883: reward = 171.99, steps = 1600\n",
      "11:37:02 [DEBUG] evaluate generation 3884: reward = -8.63, steps = 823\n",
      "11:37:09 [DEBUG] evaluate generation 3885: reward = 7.96, steps = 991\n",
      "11:37:17 [DEBUG] evaluate generation 3886: reward = 55.59, steps = 1371\n",
      "11:37:26 [DEBUG] evaluate generation 3887: reward = 179.04, steps = 1600\n",
      "11:37:35 [DEBUG] evaluate generation 3888: reward = 177.26, steps = 1600\n",
      "11:37:45 [DEBUG] evaluate generation 3889: reward = 53.60, steps = 1332\n",
      "11:37:54 [DEBUG] evaluate generation 3890: reward = 31.82, steps = 1278\n",
      "11:38:01 [DEBUG] evaluate generation 3891: reward = 181.75, steps = 1600\n",
      "11:38:10 [DEBUG] evaluate generation 3892: reward = 173.97, steps = 1600\n",
      "11:38:17 [DEBUG] evaluate generation 3893: reward = 164.51, steps = 1600\n",
      "11:38:27 [DEBUG] evaluate generation 3894: reward = 172.85, steps = 1600\n",
      "11:38:37 [DEBUG] evaluate generation 3895: reward = 170.32, steps = 1600\n",
      "11:38:45 [DEBUG] evaluate generation 3896: reward = -46.63, steps = 471\n",
      "11:38:54 [DEBUG] evaluate generation 3897: reward = 177.38, steps = 1600\n",
      "11:39:06 [DEBUG] evaluate generation 3898: reward = 177.66, steps = 1600\n",
      "11:39:12 [DEBUG] evaluate generation 3899: reward = 185.73, steps = 1600\n",
      "11:39:19 [DEBUG] evaluate generation 3900: reward = 180.36, steps = 1600\n",
      "11:39:29 [DEBUG] evaluate generation 3901: reward = 182.97, steps = 1600\n",
      "11:39:37 [DEBUG] evaluate generation 3902: reward = 181.17, steps = 1600\n",
      "11:39:45 [DEBUG] evaluate generation 3903: reward = 178.30, steps = 1600\n",
      "11:39:53 [DEBUG] evaluate generation 3904: reward = 175.53, steps = 1600\n",
      "11:40:02 [DEBUG] evaluate generation 3905: reward = 79.02, steps = 1570\n",
      "11:40:10 [DEBUG] evaluate generation 3906: reward = 176.67, steps = 1600\n",
      "11:40:22 [DEBUG] evaluate generation 3907: reward = 6.93, steps = 936\n",
      "11:40:30 [DEBUG] evaluate generation 3908: reward = 179.72, steps = 1600\n",
      "11:40:38 [DEBUG] evaluate generation 3909: reward = 177.41, steps = 1600\n",
      "11:40:48 [DEBUG] evaluate generation 3910: reward = 182.86, steps = 1600\n",
      "11:40:56 [DEBUG] evaluate generation 3911: reward = 177.35, steps = 1600\n",
      "11:41:07 [DEBUG] evaluate generation 3912: reward = 174.43, steps = 1600\n",
      "11:41:17 [DEBUG] evaluate generation 3913: reward = 180.07, steps = 1600\n",
      "11:41:25 [DEBUG] evaluate generation 3914: reward = 173.02, steps = 1600\n",
      "11:41:33 [DEBUG] evaluate generation 3915: reward = 180.56, steps = 1600\n",
      "11:41:43 [DEBUG] evaluate generation 3916: reward = 174.77, steps = 1600\n",
      "11:41:52 [DEBUG] evaluate generation 3917: reward = 179.75, steps = 1600\n",
      "11:41:59 [DEBUG] evaluate generation 3918: reward = 173.22, steps = 1600\n",
      "11:42:10 [DEBUG] evaluate generation 3919: reward = 168.04, steps = 1600\n",
      "11:42:18 [DEBUG] evaluate generation 3920: reward = -13.48, steps = 782\n",
      "11:42:27 [DEBUG] evaluate generation 3921: reward = 178.72, steps = 1600\n",
      "11:42:34 [DEBUG] evaluate generation 3922: reward = 177.35, steps = 1600\n",
      "11:42:43 [DEBUG] evaluate generation 3923: reward = 168.93, steps = 1600\n",
      "11:42:52 [DEBUG] evaluate generation 3924: reward = 172.01, steps = 1600\n",
      "11:43:03 [DEBUG] evaluate generation 3925: reward = 174.36, steps = 1600\n",
      "11:43:15 [DEBUG] evaluate generation 3926: reward = -18.09, steps = 730\n",
      "11:43:26 [DEBUG] evaluate generation 3927: reward = 179.74, steps = 1600\n",
      "11:43:35 [DEBUG] evaluate generation 3928: reward = -20.16, steps = 652\n",
      "11:43:45 [DEBUG] evaluate generation 3929: reward = 179.66, steps = 1600\n",
      "11:43:56 [DEBUG] evaluate generation 3930: reward = 176.46, steps = 1600\n",
      "11:44:06 [DEBUG] evaluate generation 3931: reward = 176.81, steps = 1600\n",
      "11:44:16 [DEBUG] evaluate generation 3932: reward = 180.77, steps = 1600\n",
      "11:44:27 [DEBUG] evaluate generation 3933: reward = 177.40, steps = 1600\n",
      "11:44:35 [DEBUG] evaluate generation 3934: reward = 178.25, steps = 1600\n",
      "11:44:44 [DEBUG] evaluate generation 3935: reward = 177.56, steps = 1600\n",
      "11:44:53 [DEBUG] evaluate generation 3936: reward = 179.01, steps = 1600\n",
      "11:45:02 [DEBUG] evaluate generation 3937: reward = 168.53, steps = 1600\n",
      "11:45:14 [DEBUG] evaluate generation 3938: reward = 171.01, steps = 1600\n",
      "11:45:24 [DEBUG] evaluate generation 3939: reward = 174.93, steps = 1600\n",
      "11:45:34 [DEBUG] evaluate generation 3940: reward = 186.65, steps = 1600\n",
      "11:45:44 [DEBUG] evaluate generation 3941: reward = 182.87, steps = 1600\n",
      "11:45:53 [DEBUG] evaluate generation 3942: reward = 178.71, steps = 1600\n",
      "11:46:02 [DEBUG] evaluate generation 3943: reward = 180.61, steps = 1600\n",
      "11:46:13 [DEBUG] evaluate generation 3944: reward = 184.26, steps = 1600\n",
      "11:46:23 [DEBUG] evaluate generation 3945: reward = 198.18, steps = 1600\n",
      "11:46:30 [DEBUG] evaluate generation 3946: reward = 181.25, steps = 1600\n",
      "11:46:41 [DEBUG] evaluate generation 3947: reward = 187.80, steps = 1600\n",
      "11:46:48 [DEBUG] evaluate generation 3948: reward = -50.73, steps = 423\n",
      "11:46:57 [DEBUG] evaluate generation 3949: reward = 187.38, steps = 1600\n",
      "11:47:06 [DEBUG] evaluate generation 3950: reward = 192.92, steps = 1600\n",
      "11:47:16 [DEBUG] evaluate generation 3951: reward = 67.92, steps = 1384\n",
      "11:47:25 [DEBUG] evaluate generation 3952: reward = 193.27, steps = 1600\n",
      "11:47:36 [DEBUG] evaluate generation 3953: reward = 193.31, steps = 1600\n",
      "11:47:50 [DEBUG] evaluate generation 3954: reward = 196.42, steps = 1600\n",
      "11:48:01 [DEBUG] evaluate generation 3955: reward = 5.01, steps = 868\n",
      "11:48:12 [DEBUG] evaluate generation 3956: reward = 187.10, steps = 1600\n",
      "11:48:20 [DEBUG] evaluate generation 3957: reward = 196.83, steps = 1600\n",
      "11:48:31 [DEBUG] evaluate generation 3958: reward = 14.15, steps = 929\n",
      "11:48:41 [DEBUG] evaluate generation 3959: reward = 194.52, steps = 1600\n",
      "11:48:52 [DEBUG] evaluate generation 3960: reward = 16.69, steps = 965\n",
      "11:49:04 [DEBUG] evaluate generation 3961: reward = 23.60, steps = 1121\n",
      "11:49:13 [DEBUG] evaluate generation 3962: reward = 187.03, steps = 1600\n",
      "11:49:23 [DEBUG] evaluate generation 3963: reward = 181.70, steps = 1600\n",
      "11:49:33 [DEBUG] evaluate generation 3964: reward = 188.30, steps = 1600\n",
      "11:49:43 [DEBUG] evaluate generation 3965: reward = 192.25, steps = 1600\n",
      "11:49:52 [DEBUG] evaluate generation 3966: reward = 182.04, steps = 1600\n",
      "11:50:00 [DEBUG] evaluate generation 3967: reward = 188.04, steps = 1600\n",
      "11:50:09 [DEBUG] evaluate generation 3968: reward = 183.78, steps = 1600\n",
      "11:50:19 [DEBUG] evaluate generation 3969: reward = 186.92, steps = 1600\n",
      "11:50:30 [DEBUG] evaluate generation 3970: reward = 186.91, steps = 1600\n",
      "11:50:41 [DEBUG] evaluate generation 3971: reward = 186.49, steps = 1600\n",
      "11:50:54 [DEBUG] evaluate generation 3972: reward = 193.63, steps = 1600\n",
      "11:51:05 [DEBUG] evaluate generation 3973: reward = 183.45, steps = 1600\n",
      "11:51:13 [DEBUG] evaluate generation 3974: reward = 47.23, steps = 1173\n",
      "11:51:25 [DEBUG] evaluate generation 3975: reward = 194.24, steps = 1600\n",
      "11:51:33 [DEBUG] evaluate generation 3976: reward = 183.84, steps = 1600\n",
      "11:51:41 [DEBUG] evaluate generation 3977: reward = 189.91, steps = 1600\n",
      "11:51:53 [DEBUG] evaluate generation 3978: reward = 185.93, steps = 1600\n",
      "11:52:05 [DEBUG] evaluate generation 3979: reward = 187.41, steps = 1600\n",
      "11:52:16 [DEBUG] evaluate generation 3980: reward = 177.10, steps = 1600\n",
      "11:52:24 [DEBUG] evaluate generation 3981: reward = 183.78, steps = 1600\n",
      "11:52:34 [DEBUG] evaluate generation 3982: reward = 188.56, steps = 1600\n",
      "11:52:44 [DEBUG] evaluate generation 3983: reward = 179.74, steps = 1600\n",
      "11:52:54 [DEBUG] evaluate generation 3984: reward = 177.67, steps = 1600\n",
      "11:53:05 [DEBUG] evaluate generation 3985: reward = 66.40, steps = 1460\n",
      "11:53:15 [DEBUG] evaluate generation 3986: reward = 172.83, steps = 1600\n",
      "11:53:25 [DEBUG] evaluate generation 3987: reward = 178.78, steps = 1600\n",
      "11:53:34 [DEBUG] evaluate generation 3988: reward = 181.14, steps = 1600\n",
      "11:53:42 [DEBUG] evaluate generation 3989: reward = 178.33, steps = 1600\n",
      "11:53:54 [DEBUG] evaluate generation 3990: reward = 185.25, steps = 1600\n",
      "11:54:03 [DEBUG] evaluate generation 3991: reward = 180.03, steps = 1600\n",
      "11:54:12 [DEBUG] evaluate generation 3992: reward = 185.36, steps = 1600\n",
      "11:54:23 [DEBUG] evaluate generation 3993: reward = 182.84, steps = 1600\n",
      "11:54:34 [DEBUG] evaluate generation 3994: reward = 176.42, steps = 1600\n",
      "11:54:41 [DEBUG] evaluate generation 3995: reward = 182.57, steps = 1600\n",
      "11:54:54 [DEBUG] evaluate generation 3996: reward = 181.19, steps = 1600\n",
      "11:55:06 [DEBUG] evaluate generation 3997: reward = 183.40, steps = 1600\n",
      "11:55:15 [DEBUG] evaluate generation 3998: reward = 190.77, steps = 1600\n",
      "11:55:26 [DEBUG] evaluate generation 3999: reward = 182.99, steps = 1600\n",
      "11:55:38 [DEBUG] evaluate generation 4000: reward = 187.02, steps = 1600\n",
      "11:55:50 [DEBUG] evaluate generation 4001: reward = 189.74, steps = 1600\n",
      "11:56:02 [DEBUG] evaluate generation 4002: reward = 184.19, steps = 1600\n",
      "11:56:12 [DEBUG] evaluate generation 4003: reward = 183.55, steps = 1600\n",
      "11:56:22 [DEBUG] evaluate generation 4004: reward = 180.98, steps = 1600\n",
      "11:56:31 [DEBUG] evaluate generation 4005: reward = 179.02, steps = 1600\n",
      "11:56:42 [DEBUG] evaluate generation 4006: reward = 179.67, steps = 1600\n",
      "11:56:54 [DEBUG] evaluate generation 4007: reward = 179.78, steps = 1600\n",
      "11:57:06 [DEBUG] evaluate generation 4008: reward = 182.91, steps = 1600\n",
      "11:57:18 [DEBUG] evaluate generation 4009: reward = 181.02, steps = 1600\n",
      "11:57:28 [DEBUG] evaluate generation 4010: reward = 181.46, steps = 1600\n",
      "11:57:39 [DEBUG] evaluate generation 4011: reward = 188.79, steps = 1600\n",
      "11:57:51 [DEBUG] evaluate generation 4012: reward = 189.50, steps = 1600\n",
      "11:58:00 [DEBUG] evaluate generation 4013: reward = -8.29, steps = 751\n",
      "11:58:13 [DEBUG] evaluate generation 4014: reward = 195.76, steps = 1600\n",
      "11:58:22 [DEBUG] evaluate generation 4015: reward = 199.60, steps = 1600\n",
      "11:58:33 [DEBUG] evaluate generation 4016: reward = 195.19, steps = 1600\n",
      "11:58:42 [DEBUG] evaluate generation 4017: reward = 207.75, steps = 1600\n",
      "11:58:55 [DEBUG] evaluate generation 4018: reward = 200.67, steps = 1600\n",
      "11:59:05 [DEBUG] evaluate generation 4019: reward = 211.62, steps = 1600\n",
      "11:59:14 [DEBUG] evaluate generation 4020: reward = 199.01, steps = 1600\n",
      "11:59:24 [DEBUG] evaluate generation 4021: reward = 197.72, steps = 1600\n",
      "11:59:34 [DEBUG] evaluate generation 4022: reward = 201.24, steps = 1600\n",
      "11:59:45 [DEBUG] evaluate generation 4023: reward = 195.45, steps = 1600\n",
      "11:59:56 [DEBUG] evaluate generation 4024: reward = 199.98, steps = 1600\n",
      "12:00:06 [DEBUG] evaluate generation 4025: reward = 197.27, steps = 1600\n",
      "12:00:14 [DEBUG] evaluate generation 4026: reward = 195.14, steps = 1600\n",
      "12:00:25 [DEBUG] evaluate generation 4027: reward = 189.52, steps = 1600\n",
      "12:00:35 [DEBUG] evaluate generation 4028: reward = 38.43, steps = 1192\n",
      "12:00:44 [DEBUG] evaluate generation 4029: reward = 186.69, steps = 1600\n",
      "12:00:54 [DEBUG] evaluate generation 4030: reward = 192.93, steps = 1600\n",
      "12:01:04 [DEBUG] evaluate generation 4031: reward = 183.36, steps = 1600\n",
      "12:01:15 [DEBUG] evaluate generation 4032: reward = 185.72, steps = 1600\n",
      "12:01:30 [DEBUG] evaluate generation 4033: reward = 186.20, steps = 1600\n",
      "12:01:40 [DEBUG] evaluate generation 4034: reward = 190.22, steps = 1600\n",
      "12:01:51 [DEBUG] evaluate generation 4035: reward = 195.86, steps = 1600\n",
      "12:02:02 [DEBUG] evaluate generation 4036: reward = 191.25, steps = 1600\n",
      "12:02:12 [DEBUG] evaluate generation 4037: reward = 197.93, steps = 1600\n",
      "12:02:23 [DEBUG] evaluate generation 4038: reward = 191.39, steps = 1600\n",
      "12:02:37 [DEBUG] evaluate generation 4039: reward = 198.19, steps = 1600\n",
      "12:02:47 [DEBUG] evaluate generation 4040: reward = 196.05, steps = 1600\n",
      "12:02:57 [DEBUG] evaluate generation 4041: reward = 192.52, steps = 1600\n",
      "12:03:07 [DEBUG] evaluate generation 4042: reward = 194.17, steps = 1600\n",
      "12:03:19 [DEBUG] evaluate generation 4043: reward = 186.55, steps = 1600\n",
      "12:03:29 [DEBUG] evaluate generation 4044: reward = 184.55, steps = 1600\n",
      "12:03:38 [DEBUG] evaluate generation 4045: reward = 189.61, steps = 1600\n",
      "12:03:50 [DEBUG] evaluate generation 4046: reward = 191.65, steps = 1600\n",
      "12:04:00 [DEBUG] evaluate generation 4047: reward = 177.95, steps = 1600\n",
      "12:04:13 [DEBUG] evaluate generation 4048: reward = 188.13, steps = 1600\n",
      "12:04:25 [DEBUG] evaluate generation 4049: reward = 185.96, steps = 1600\n",
      "12:04:36 [DEBUG] evaluate generation 4050: reward = 195.39, steps = 1600\n",
      "12:04:49 [DEBUG] evaluate generation 4051: reward = 191.68, steps = 1600\n",
      "12:05:03 [DEBUG] evaluate generation 4052: reward = 194.45, steps = 1600\n",
      "12:05:14 [DEBUG] evaluate generation 4053: reward = 201.44, steps = 1600\n",
      "12:05:27 [DEBUG] evaluate generation 4054: reward = 193.78, steps = 1600\n",
      "12:05:37 [DEBUG] evaluate generation 4055: reward = 193.37, steps = 1600\n",
      "12:05:49 [DEBUG] evaluate generation 4056: reward = 189.01, steps = 1600\n",
      "12:06:02 [DEBUG] evaluate generation 4057: reward = 198.74, steps = 1600\n",
      "12:06:12 [DEBUG] evaluate generation 4058: reward = 192.61, steps = 1600\n",
      "12:06:22 [DEBUG] evaluate generation 4059: reward = 88.54, steps = 1547\n",
      "12:06:35 [DEBUG] evaluate generation 4060: reward = 194.78, steps = 1600\n",
      "12:06:45 [DEBUG] evaluate generation 4061: reward = 184.60, steps = 1600\n",
      "12:06:57 [DEBUG] evaluate generation 4062: reward = 185.75, steps = 1600\n",
      "12:07:07 [DEBUG] evaluate generation 4063: reward = 188.17, steps = 1600\n",
      "12:07:19 [DEBUG] evaluate generation 4064: reward = 194.62, steps = 1600\n",
      "12:07:32 [DEBUG] evaluate generation 4065: reward = 189.28, steps = 1600\n",
      "12:07:47 [DEBUG] evaluate generation 4066: reward = 195.55, steps = 1600\n",
      "12:07:57 [DEBUG] evaluate generation 4067: reward = 194.68, steps = 1600\n",
      "12:08:10 [DEBUG] evaluate generation 4068: reward = 195.15, steps = 1600\n",
      "12:08:21 [DEBUG] evaluate generation 4069: reward = 197.21, steps = 1600\n",
      "12:08:33 [DEBUG] evaluate generation 4070: reward = 197.80, steps = 1600\n",
      "12:08:44 [DEBUG] evaluate generation 4071: reward = 195.82, steps = 1600\n",
      "12:08:55 [DEBUG] evaluate generation 4072: reward = 188.40, steps = 1600\n",
      "12:09:11 [DEBUG] evaluate generation 4073: reward = 200.49, steps = 1600\n",
      "12:09:23 [DEBUG] evaluate generation 4074: reward = -68.74, steps = 253\n",
      "12:09:33 [DEBUG] evaluate generation 4075: reward = 204.01, steps = 1600\n",
      "12:09:44 [DEBUG] evaluate generation 4076: reward = 201.03, steps = 1600\n",
      "12:09:56 [DEBUG] evaluate generation 4077: reward = 198.43, steps = 1600\n",
      "12:10:07 [DEBUG] evaluate generation 4078: reward = 196.05, steps = 1600\n",
      "12:10:19 [DEBUG] evaluate generation 4079: reward = 193.90, steps = 1600\n",
      "12:10:30 [DEBUG] evaluate generation 4080: reward = 194.53, steps = 1600\n",
      "12:10:40 [DEBUG] evaluate generation 4081: reward = 181.97, steps = 1600\n",
      "12:10:51 [DEBUG] evaluate generation 4082: reward = 187.59, steps = 1600\n",
      "12:11:01 [DEBUG] evaluate generation 4083: reward = 195.68, steps = 1600\n",
      "12:11:12 [DEBUG] evaluate generation 4084: reward = 194.18, steps = 1600\n",
      "12:11:28 [DEBUG] evaluate generation 4085: reward = 194.26, steps = 1600\n",
      "12:11:42 [DEBUG] evaluate generation 4086: reward = 193.28, steps = 1600\n",
      "12:11:54 [DEBUG] evaluate generation 4087: reward = 207.44, steps = 1600\n",
      "12:12:03 [DEBUG] evaluate generation 4088: reward = 206.00, steps = 1600\n",
      "12:12:15 [DEBUG] evaluate generation 4089: reward = 206.34, steps = 1600\n",
      "12:12:26 [DEBUG] evaluate generation 4090: reward = 208.37, steps = 1600\n",
      "12:12:37 [DEBUG] evaluate generation 4091: reward = 201.99, steps = 1600\n",
      "12:12:48 [DEBUG] evaluate generation 4092: reward = 199.26, steps = 1600\n",
      "12:13:00 [DEBUG] evaluate generation 4093: reward = 198.13, steps = 1600\n",
      "12:13:14 [DEBUG] evaluate generation 4094: reward = 201.15, steps = 1600\n",
      "12:13:25 [DEBUG] evaluate generation 4095: reward = 196.86, steps = 1600\n",
      "12:13:34 [DEBUG] evaluate generation 4096: reward = 204.47, steps = 1600\n",
      "12:13:44 [DEBUG] evaluate generation 4097: reward = 206.26, steps = 1600\n",
      "12:13:56 [DEBUG] evaluate generation 4098: reward = 203.20, steps = 1600\n",
      "12:14:07 [DEBUG] evaluate generation 4099: reward = 204.08, steps = 1600\n",
      "12:14:18 [DEBUG] evaluate generation 4100: reward = 199.99, steps = 1600\n",
      "12:14:31 [DEBUG] evaluate generation 4101: reward = 203.12, steps = 1600\n",
      "12:14:44 [DEBUG] evaluate generation 4102: reward = 193.17, steps = 1600\n",
      "12:14:55 [DEBUG] evaluate generation 4103: reward = 194.27, steps = 1600\n",
      "12:15:05 [DEBUG] evaluate generation 4104: reward = 198.11, steps = 1600\n",
      "12:15:18 [DEBUG] evaluate generation 4105: reward = 200.49, steps = 1600\n",
      "12:15:29 [DEBUG] evaluate generation 4106: reward = 190.04, steps = 1600\n",
      "12:15:41 [DEBUG] evaluate generation 4107: reward = 194.20, steps = 1600\n",
      "12:15:52 [DEBUG] evaluate generation 4108: reward = 195.19, steps = 1600\n",
      "12:16:04 [DEBUG] evaluate generation 4109: reward = 197.28, steps = 1600\n",
      "12:16:15 [DEBUG] evaluate generation 4110: reward = 194.00, steps = 1600\n",
      "12:16:28 [DEBUG] evaluate generation 4111: reward = 211.02, steps = 1600\n",
      "12:16:38 [DEBUG] evaluate generation 4112: reward = 204.65, steps = 1600\n",
      "12:16:49 [DEBUG] evaluate generation 4113: reward = 207.04, steps = 1600\n",
      "12:16:59 [DEBUG] evaluate generation 4114: reward = 205.09, steps = 1600\n",
      "12:17:09 [DEBUG] evaluate generation 4115: reward = 205.86, steps = 1600\n",
      "12:17:22 [DEBUG] evaluate generation 4116: reward = 193.49, steps = 1600\n",
      "12:17:35 [DEBUG] evaluate generation 4117: reward = 200.15, steps = 1600\n",
      "12:17:47 [DEBUG] evaluate generation 4118: reward = 199.88, steps = 1600\n",
      "12:17:58 [DEBUG] evaluate generation 4119: reward = 195.32, steps = 1600\n",
      "12:18:11 [DEBUG] evaluate generation 4120: reward = 200.00, steps = 1600\n",
      "12:18:23 [DEBUG] evaluate generation 4121: reward = 195.35, steps = 1600\n",
      "12:18:36 [DEBUG] evaluate generation 4122: reward = 201.81, steps = 1600\n",
      "12:18:48 [DEBUG] evaluate generation 4123: reward = 213.09, steps = 1600\n",
      "12:19:00 [DEBUG] evaluate generation 4124: reward = 205.67, steps = 1600\n",
      "12:19:14 [DEBUG] evaluate generation 4125: reward = 198.73, steps = 1600\n",
      "12:19:24 [DEBUG] evaluate generation 4126: reward = 194.34, steps = 1600\n",
      "12:19:38 [DEBUG] evaluate generation 4127: reward = 202.99, steps = 1600\n",
      "12:19:50 [DEBUG] evaluate generation 4128: reward = 202.27, steps = 1600\n",
      "12:20:00 [DEBUG] evaluate generation 4129: reward = 199.70, steps = 1600\n",
      "12:20:12 [DEBUG] evaluate generation 4130: reward = 207.30, steps = 1600\n",
      "12:20:23 [DEBUG] evaluate generation 4131: reward = 213.43, steps = 1600\n",
      "12:20:33 [DEBUG] evaluate generation 4132: reward = 201.35, steps = 1600\n",
      "12:20:46 [DEBUG] evaluate generation 4133: reward = 200.56, steps = 1600\n",
      "12:20:58 [DEBUG] evaluate generation 4134: reward = 209.64, steps = 1600\n",
      "12:21:05 [DEBUG] evaluate generation 4135: reward = 206.58, steps = 1600\n",
      "12:21:16 [DEBUG] evaluate generation 4136: reward = 203.41, steps = 1600\n",
      "12:21:29 [DEBUG] evaluate generation 4137: reward = 208.05, steps = 1600\n",
      "12:21:40 [DEBUG] evaluate generation 4138: reward = 209.34, steps = 1600\n",
      "12:21:49 [DEBUG] evaluate generation 4139: reward = 199.16, steps = 1600\n",
      "12:21:59 [DEBUG] evaluate generation 4140: reward = 193.10, steps = 1600\n",
      "12:22:08 [DEBUG] evaluate generation 4141: reward = 199.14, steps = 1600\n",
      "12:22:19 [DEBUG] evaluate generation 4142: reward = 208.53, steps = 1600\n",
      "12:22:33 [DEBUG] evaluate generation 4143: reward = 202.51, steps = 1600\n",
      "12:22:44 [DEBUG] evaluate generation 4144: reward = 201.05, steps = 1600\n",
      "12:22:53 [DEBUG] evaluate generation 4145: reward = 194.35, steps = 1600\n",
      "12:23:05 [DEBUG] evaluate generation 4146: reward = 196.89, steps = 1600\n",
      "12:23:18 [DEBUG] evaluate generation 4147: reward = 189.71, steps = 1600\n",
      "12:23:29 [DEBUG] evaluate generation 4148: reward = 194.37, steps = 1600\n",
      "12:23:39 [DEBUG] evaluate generation 4149: reward = 195.26, steps = 1600\n",
      "12:23:51 [DEBUG] evaluate generation 4150: reward = 203.75, steps = 1600\n",
      "12:24:04 [DEBUG] evaluate generation 4151: reward = 206.02, steps = 1600\n",
      "12:24:13 [DEBUG] evaluate generation 4152: reward = 201.30, steps = 1600\n",
      "12:24:25 [DEBUG] evaluate generation 4153: reward = 200.63, steps = 1600\n",
      "12:24:39 [DEBUG] evaluate generation 4154: reward = 202.44, steps = 1600\n",
      "12:24:49 [DEBUG] evaluate generation 4155: reward = 201.46, steps = 1600\n",
      "12:24:59 [DEBUG] evaluate generation 4156: reward = 199.98, steps = 1600\n",
      "12:25:09 [DEBUG] evaluate generation 4157: reward = 199.83, steps = 1600\n",
      "12:25:20 [DEBUG] evaluate generation 4158: reward = 194.37, steps = 1600\n",
      "12:25:30 [DEBUG] evaluate generation 4159: reward = 191.20, steps = 1600\n",
      "12:25:44 [DEBUG] evaluate generation 4160: reward = 198.38, steps = 1600\n",
      "12:25:57 [DEBUG] evaluate generation 4161: reward = 197.67, steps = 1600\n",
      "12:26:08 [DEBUG] evaluate generation 4162: reward = 196.39, steps = 1600\n",
      "12:26:18 [DEBUG] evaluate generation 4163: reward = 194.53, steps = 1600\n",
      "12:26:30 [DEBUG] evaluate generation 4164: reward = 199.50, steps = 1600\n",
      "12:26:43 [DEBUG] evaluate generation 4165: reward = 204.59, steps = 1600\n",
      "12:26:55 [DEBUG] evaluate generation 4166: reward = 203.37, steps = 1600\n",
      "12:27:06 [DEBUG] evaluate generation 4167: reward = 210.04, steps = 1600\n",
      "12:27:17 [DEBUG] evaluate generation 4168: reward = 214.70, steps = 1600\n",
      "12:27:29 [DEBUG] evaluate generation 4169: reward = 205.33, steps = 1600\n",
      "12:27:42 [DEBUG] evaluate generation 4170: reward = 209.81, steps = 1600\n",
      "12:27:54 [DEBUG] evaluate generation 4171: reward = 216.18, steps = 1600\n",
      "12:28:04 [DEBUG] evaluate generation 4172: reward = 211.44, steps = 1600\n",
      "12:28:16 [DEBUG] evaluate generation 4173: reward = 215.38, steps = 1600\n",
      "12:28:27 [DEBUG] evaluate generation 4174: reward = 201.52, steps = 1600\n",
      "12:28:39 [DEBUG] evaluate generation 4175: reward = 213.03, steps = 1600\n",
      "12:28:51 [DEBUG] evaluate generation 4176: reward = 214.34, steps = 1600\n",
      "12:29:04 [DEBUG] evaluate generation 4177: reward = 220.76, steps = 1600\n",
      "12:29:16 [DEBUG] evaluate generation 4178: reward = 210.70, steps = 1600\n",
      "12:29:26 [DEBUG] evaluate generation 4179: reward = 203.24, steps = 1600\n",
      "12:29:38 [DEBUG] evaluate generation 4180: reward = 208.62, steps = 1600\n",
      "12:29:50 [DEBUG] evaluate generation 4181: reward = 204.57, steps = 1600\n",
      "12:30:02 [DEBUG] evaluate generation 4182: reward = 212.68, steps = 1600\n",
      "12:30:09 [DEBUG] evaluate generation 4183: reward = 204.02, steps = 1600\n",
      "12:30:19 [DEBUG] evaluate generation 4184: reward = 202.22, steps = 1600\n",
      "12:30:32 [DEBUG] evaluate generation 4185: reward = 206.59, steps = 1600\n",
      "12:30:44 [DEBUG] evaluate generation 4186: reward = 209.20, steps = 1600\n",
      "12:30:54 [DEBUG] evaluate generation 4187: reward = 205.27, steps = 1600\n",
      "12:31:05 [DEBUG] evaluate generation 4188: reward = 209.82, steps = 1600\n",
      "12:31:17 [DEBUG] evaluate generation 4189: reward = 203.73, steps = 1600\n",
      "12:31:30 [DEBUG] evaluate generation 4190: reward = 191.72, steps = 1600\n",
      "12:31:43 [DEBUG] evaluate generation 4191: reward = 201.81, steps = 1600\n",
      "12:31:53 [DEBUG] evaluate generation 4192: reward = 195.78, steps = 1600\n",
      "12:32:04 [DEBUG] evaluate generation 4193: reward = 197.60, steps = 1600\n",
      "12:32:16 [DEBUG] evaluate generation 4194: reward = 193.45, steps = 1600\n",
      "12:32:29 [DEBUG] evaluate generation 4195: reward = 201.98, steps = 1600\n",
      "12:32:41 [DEBUG] evaluate generation 4196: reward = 198.00, steps = 1600\n",
      "12:32:54 [DEBUG] evaluate generation 4197: reward = 198.28, steps = 1600\n",
      "12:33:05 [DEBUG] evaluate generation 4198: reward = 200.76, steps = 1600\n",
      "12:33:17 [DEBUG] evaluate generation 4199: reward = 199.84, steps = 1600\n",
      "12:33:32 [DEBUG] evaluate generation 4200: reward = 215.81, steps = 1600\n",
      "12:33:45 [DEBUG] evaluate generation 4201: reward = 210.53, steps = 1600\n",
      "12:33:57 [DEBUG] evaluate generation 4202: reward = 208.49, steps = 1600\n",
      "12:34:08 [DEBUG] evaluate generation 4203: reward = 216.91, steps = 1600\n",
      "12:34:19 [DEBUG] evaluate generation 4204: reward = 218.74, steps = 1600\n",
      "12:34:33 [DEBUG] evaluate generation 4205: reward = 220.19, steps = 1600\n",
      "12:34:44 [DEBUG] evaluate generation 4206: reward = 212.39, steps = 1600\n",
      "12:34:57 [DEBUG] evaluate generation 4207: reward = 207.80, steps = 1600\n",
      "12:35:11 [DEBUG] evaluate generation 4208: reward = 215.90, steps = 1600\n",
      "12:35:20 [DEBUG] evaluate generation 4209: reward = 205.32, steps = 1600\n",
      "12:35:32 [DEBUG] evaluate generation 4210: reward = 196.60, steps = 1600\n",
      "12:35:43 [DEBUG] evaluate generation 4211: reward = 203.84, steps = 1600\n",
      "12:35:53 [DEBUG] evaluate generation 4212: reward = 193.80, steps = 1600\n",
      "12:36:06 [DEBUG] evaluate generation 4213: reward = 204.87, steps = 1600\n",
      "12:36:19 [DEBUG] evaluate generation 4214: reward = 207.05, steps = 1600\n",
      "12:36:32 [DEBUG] evaluate generation 4215: reward = 216.56, steps = 1600\n",
      "12:36:45 [DEBUG] evaluate generation 4216: reward = 213.48, steps = 1600\n",
      "12:36:58 [DEBUG] evaluate generation 4217: reward = 211.65, steps = 1600\n",
      "12:37:08 [DEBUG] evaluate generation 4218: reward = 213.78, steps = 1600\n",
      "12:37:17 [DEBUG] evaluate generation 4219: reward = 214.41, steps = 1600\n",
      "12:37:29 [DEBUG] evaluate generation 4220: reward = 212.71, steps = 1600\n",
      "12:37:41 [DEBUG] evaluate generation 4221: reward = 211.66, steps = 1600\n",
      "12:37:53 [DEBUG] evaluate generation 4222: reward = 214.84, steps = 1600\n",
      "12:38:07 [DEBUG] evaluate generation 4223: reward = 213.36, steps = 1600\n",
      "12:38:17 [DEBUG] evaluate generation 4224: reward = 211.00, steps = 1600\n",
      "12:38:30 [DEBUG] evaluate generation 4225: reward = 215.84, steps = 1600\n",
      "12:38:43 [DEBUG] evaluate generation 4226: reward = 215.38, steps = 1600\n",
      "12:38:54 [DEBUG] evaluate generation 4227: reward = 208.95, steps = 1600\n",
      "12:39:06 [DEBUG] evaluate generation 4228: reward = 218.07, steps = 1600\n",
      "12:39:16 [DEBUG] evaluate generation 4229: reward = 209.61, steps = 1600\n",
      "12:39:29 [DEBUG] evaluate generation 4230: reward = 224.11, steps = 1600\n",
      "12:39:39 [DEBUG] evaluate generation 4231: reward = 218.33, steps = 1600\n",
      "12:39:49 [DEBUG] evaluate generation 4232: reward = 214.93, steps = 1600\n",
      "12:40:04 [DEBUG] evaluate generation 4233: reward = 215.55, steps = 1600\n",
      "12:40:14 [DEBUG] evaluate generation 4234: reward = 217.96, steps = 1600\n",
      "12:40:28 [DEBUG] evaluate generation 4235: reward = 212.59, steps = 1600\n",
      "12:40:40 [DEBUG] evaluate generation 4236: reward = 217.32, steps = 1600\n",
      "12:40:52 [DEBUG] evaluate generation 4237: reward = 210.10, steps = 1600\n",
      "12:41:04 [DEBUG] evaluate generation 4238: reward = 216.88, steps = 1600\n",
      "12:41:16 [DEBUG] evaluate generation 4239: reward = 206.85, steps = 1600\n",
      "12:41:28 [DEBUG] evaluate generation 4240: reward = 224.51, steps = 1600\n",
      "12:41:40 [DEBUG] evaluate generation 4241: reward = 225.55, steps = 1600\n",
      "12:41:52 [DEBUG] evaluate generation 4242: reward = 223.25, steps = 1600\n",
      "12:42:03 [DEBUG] evaluate generation 4243: reward = 227.77, steps = 1600\n",
      "12:42:15 [DEBUG] evaluate generation 4244: reward = 222.63, steps = 1600\n",
      "12:42:25 [DEBUG] evaluate generation 4245: reward = 214.14, steps = 1600\n",
      "12:42:37 [DEBUG] evaluate generation 4246: reward = 221.00, steps = 1600\n",
      "12:42:48 [DEBUG] evaluate generation 4247: reward = 213.06, steps = 1600\n",
      "12:43:00 [DEBUG] evaluate generation 4248: reward = 215.13, steps = 1600\n",
      "12:43:12 [DEBUG] evaluate generation 4249: reward = 205.81, steps = 1600\n",
      "12:43:24 [DEBUG] evaluate generation 4250: reward = 215.83, steps = 1600\n",
      "12:43:34 [DEBUG] evaluate generation 4251: reward = 201.65, steps = 1600\n",
      "12:43:48 [DEBUG] evaluate generation 4252: reward = 212.10, steps = 1600\n",
      "12:44:02 [DEBUG] evaluate generation 4253: reward = 214.05, steps = 1600\n",
      "12:44:14 [DEBUG] evaluate generation 4254: reward = 204.25, steps = 1600\n",
      "12:44:25 [DEBUG] evaluate generation 4255: reward = 202.33, steps = 1600\n",
      "12:44:40 [DEBUG] evaluate generation 4256: reward = 205.63, steps = 1600\n",
      "12:44:53 [DEBUG] evaluate generation 4257: reward = 205.79, steps = 1600\n",
      "12:45:03 [DEBUG] evaluate generation 4258: reward = 202.93, steps = 1600\n",
      "12:45:17 [DEBUG] evaluate generation 4259: reward = 210.38, steps = 1600\n",
      "12:45:30 [DEBUG] evaluate generation 4260: reward = 221.45, steps = 1600\n",
      "12:45:42 [DEBUG] evaluate generation 4261: reward = 217.80, steps = 1600\n",
      "12:45:54 [DEBUG] evaluate generation 4262: reward = 211.72, steps = 1600\n",
      "12:46:04 [DEBUG] evaluate generation 4263: reward = 207.14, steps = 1600\n",
      "12:46:15 [DEBUG] evaluate generation 4264: reward = 214.43, steps = 1600\n",
      "12:46:27 [DEBUG] evaluate generation 4265: reward = 208.83, steps = 1600\n",
      "12:46:40 [DEBUG] evaluate generation 4266: reward = 205.49, steps = 1600\n",
      "12:46:54 [DEBUG] evaluate generation 4267: reward = 216.84, steps = 1600\n",
      "12:47:07 [DEBUG] evaluate generation 4268: reward = 212.66, steps = 1600\n",
      "12:47:18 [DEBUG] evaluate generation 4269: reward = 213.98, steps = 1600\n",
      "12:47:31 [DEBUG] evaluate generation 4270: reward = 218.66, steps = 1600\n",
      "12:47:42 [DEBUG] evaluate generation 4271: reward = 211.15, steps = 1600\n",
      "12:47:56 [DEBUG] evaluate generation 4272: reward = 228.61, steps = 1600\n",
      "12:48:07 [DEBUG] evaluate generation 4273: reward = 219.36, steps = 1600\n",
      "12:48:18 [DEBUG] evaluate generation 4274: reward = 225.00, steps = 1600\n",
      "12:48:30 [DEBUG] evaluate generation 4275: reward = 220.24, steps = 1600\n",
      "12:48:41 [DEBUG] evaluate generation 4276: reward = 213.68, steps = 1600\n",
      "12:48:54 [DEBUG] evaluate generation 4277: reward = 210.70, steps = 1600\n",
      "12:49:06 [DEBUG] evaluate generation 4278: reward = 206.89, steps = 1600\n",
      "12:49:19 [DEBUG] evaluate generation 4279: reward = 216.59, steps = 1600\n",
      "12:49:31 [DEBUG] evaluate generation 4280: reward = 202.71, steps = 1600\n",
      "12:49:43 [DEBUG] evaluate generation 4281: reward = 216.72, steps = 1600\n",
      "12:49:56 [DEBUG] evaluate generation 4282: reward = 223.17, steps = 1600\n",
      "12:50:09 [DEBUG] evaluate generation 4283: reward = 228.61, steps = 1600\n",
      "12:50:22 [DEBUG] evaluate generation 4284: reward = 227.50, steps = 1600\n",
      "12:50:35 [DEBUG] evaluate generation 4285: reward = 223.05, steps = 1600\n",
      "12:50:45 [DEBUG] evaluate generation 4286: reward = 224.46, steps = 1600\n",
      "12:50:55 [DEBUG] evaluate generation 4287: reward = 213.99, steps = 1600\n",
      "12:51:09 [DEBUG] evaluate generation 4288: reward = 219.65, steps = 1600\n",
      "12:51:21 [DEBUG] evaluate generation 4289: reward = 219.17, steps = 1600\n",
      "12:51:32 [DEBUG] evaluate generation 4290: reward = 220.44, steps = 1600\n",
      "12:51:45 [DEBUG] evaluate generation 4291: reward = 216.13, steps = 1600\n",
      "12:51:59 [DEBUG] evaluate generation 4292: reward = 230.58, steps = 1600\n",
      "12:52:13 [DEBUG] evaluate generation 4293: reward = 226.90, steps = 1600\n",
      "12:52:25 [DEBUG] evaluate generation 4294: reward = 221.88, steps = 1600\n",
      "12:52:38 [DEBUG] evaluate generation 4295: reward = 234.49, steps = 1600\n",
      "12:52:52 [DEBUG] evaluate generation 4296: reward = 233.24, steps = 1600\n",
      "12:53:06 [DEBUG] evaluate generation 4297: reward = 236.31, steps = 1600\n",
      "12:53:16 [DEBUG] evaluate generation 4298: reward = 236.12, steps = 1600\n",
      "12:53:29 [DEBUG] evaluate generation 4299: reward = 226.44, steps = 1600\n",
      "12:53:44 [DEBUG] evaluate generation 4300: reward = 231.88, steps = 1600\n",
      "12:53:57 [DEBUG] evaluate generation 4301: reward = 232.21, steps = 1600\n",
      "12:54:10 [DEBUG] evaluate generation 4302: reward = 240.78, steps = 1600\n",
      "12:54:22 [DEBUG] evaluate generation 4303: reward = 243.57, steps = 1600\n",
      "12:54:32 [DEBUG] evaluate generation 4304: reward = 240.49, steps = 1600\n",
      "12:54:45 [DEBUG] evaluate generation 4305: reward = 236.81, steps = 1600\n",
      "12:54:57 [DEBUG] evaluate generation 4306: reward = 243.07, steps = 1600\n",
      "12:55:09 [DEBUG] evaluate generation 4307: reward = 236.26, steps = 1600\n",
      "12:55:20 [DEBUG] evaluate generation 4308: reward = 234.54, steps = 1600\n",
      "12:55:31 [DEBUG] evaluate generation 4309: reward = 226.07, steps = 1600\n",
      "12:55:44 [DEBUG] evaluate generation 4310: reward = 229.80, steps = 1600\n",
      "12:55:58 [DEBUG] evaluate generation 4311: reward = 231.25, steps = 1600\n",
      "12:56:07 [DEBUG] evaluate generation 4312: reward = 227.82, steps = 1600\n",
      "12:56:18 [DEBUG] evaluate generation 4313: reward = 236.67, steps = 1600\n",
      "12:56:30 [DEBUG] evaluate generation 4314: reward = 232.70, steps = 1600\n",
      "12:56:45 [DEBUG] evaluate generation 4315: reward = 231.59, steps = 1600\n",
      "12:56:55 [DEBUG] evaluate generation 4316: reward = 231.15, steps = 1600\n",
      "12:57:09 [DEBUG] evaluate generation 4317: reward = 231.14, steps = 1600\n",
      "12:57:20 [DEBUG] evaluate generation 4318: reward = 232.09, steps = 1600\n",
      "12:57:31 [DEBUG] evaluate generation 4319: reward = 211.23, steps = 1600\n",
      "12:57:42 [DEBUG] evaluate generation 4320: reward = 219.22, steps = 1600\n",
      "12:57:53 [DEBUG] evaluate generation 4321: reward = 215.68, steps = 1600\n",
      "12:58:04 [DEBUG] evaluate generation 4322: reward = 194.79, steps = 1600\n",
      "12:58:18 [DEBUG] evaluate generation 4323: reward = 205.73, steps = 1600\n",
      "12:58:33 [DEBUG] evaluate generation 4324: reward = 217.19, steps = 1600\n",
      "12:58:44 [DEBUG] evaluate generation 4325: reward = 213.20, steps = 1600\n",
      "12:58:57 [DEBUG] evaluate generation 4326: reward = 195.79, steps = 1600\n",
      "12:59:11 [DEBUG] evaluate generation 4327: reward = 209.91, steps = 1600\n",
      "12:59:24 [DEBUG] evaluate generation 4328: reward = 217.53, steps = 1600\n",
      "12:59:36 [DEBUG] evaluate generation 4329: reward = 193.02, steps = 1600\n",
      "12:59:50 [DEBUG] evaluate generation 4330: reward = 212.48, steps = 1600\n",
      "13:00:02 [DEBUG] evaluate generation 4331: reward = 216.04, steps = 1600\n",
      "13:00:18 [DEBUG] evaluate generation 4332: reward = 211.02, steps = 1600\n",
      "13:00:30 [DEBUG] evaluate generation 4333: reward = 214.23, steps = 1600\n",
      "13:00:39 [DEBUG] evaluate generation 4334: reward = 214.54, steps = 1600\n",
      "13:00:52 [DEBUG] evaluate generation 4335: reward = 220.29, steps = 1600\n",
      "13:01:04 [DEBUG] evaluate generation 4336: reward = 222.95, steps = 1600\n",
      "13:01:18 [DEBUG] evaluate generation 4337: reward = 229.80, steps = 1600\n",
      "13:01:31 [DEBUG] evaluate generation 4338: reward = 238.15, steps = 1600\n",
      "13:01:43 [DEBUG] evaluate generation 4339: reward = 234.72, steps = 1600\n",
      "13:01:54 [DEBUG] evaluate generation 4340: reward = 203.91, steps = 1600\n",
      "13:02:07 [DEBUG] evaluate generation 4341: reward = 234.17, steps = 1600\n",
      "13:02:16 [DEBUG] evaluate generation 4342: reward = 230.98, steps = 1600\n",
      "13:02:27 [DEBUG] evaluate generation 4343: reward = 232.31, steps = 1600\n",
      "13:02:40 [DEBUG] evaluate generation 4344: reward = 231.41, steps = 1600\n",
      "13:02:52 [DEBUG] evaluate generation 4345: reward = 226.60, steps = 1600\n",
      "13:03:06 [DEBUG] evaluate generation 4346: reward = 229.23, steps = 1600\n",
      "13:03:17 [DEBUG] evaluate generation 4347: reward = 227.30, steps = 1600\n",
      "13:03:29 [DEBUG] evaluate generation 4348: reward = 221.44, steps = 1600\n",
      "13:03:42 [DEBUG] evaluate generation 4349: reward = 219.56, steps = 1600\n",
      "13:03:54 [DEBUG] evaluate generation 4350: reward = 52.21, steps = 1112\n",
      "13:04:08 [DEBUG] evaluate generation 4351: reward = 224.58, steps = 1600\n",
      "13:04:18 [DEBUG] evaluate generation 4352: reward = 219.78, steps = 1600\n",
      "13:04:29 [DEBUG] evaluate generation 4353: reward = 223.14, steps = 1600\n",
      "13:04:43 [DEBUG] evaluate generation 4354: reward = 230.99, steps = 1600\n",
      "13:04:56 [DEBUG] evaluate generation 4355: reward = 213.24, steps = 1600\n",
      "13:05:10 [DEBUG] evaluate generation 4356: reward = 228.22, steps = 1600\n",
      "13:05:21 [DEBUG] evaluate generation 4357: reward = 220.25, steps = 1600\n",
      "13:05:33 [DEBUG] evaluate generation 4358: reward = 226.86, steps = 1600\n",
      "13:05:46 [DEBUG] evaluate generation 4359: reward = 210.53, steps = 1600\n",
      "13:05:59 [DEBUG] evaluate generation 4360: reward = 220.73, steps = 1600\n",
      "13:06:11 [DEBUG] evaluate generation 4361: reward = 215.18, steps = 1600\n",
      "13:06:24 [DEBUG] evaluate generation 4362: reward = 224.14, steps = 1600\n",
      "13:06:35 [DEBUG] evaluate generation 4363: reward = 217.13, steps = 1600\n",
      "13:06:49 [DEBUG] evaluate generation 4364: reward = 221.69, steps = 1600\n",
      "13:07:00 [DEBUG] evaluate generation 4365: reward = 214.74, steps = 1600\n",
      "13:07:15 [DEBUG] evaluate generation 4366: reward = 217.36, steps = 1600\n",
      "13:07:27 [DEBUG] evaluate generation 4367: reward = 220.90, steps = 1600\n",
      "13:07:38 [DEBUG] evaluate generation 4368: reward = 214.61, steps = 1600\n",
      "13:07:50 [DEBUG] evaluate generation 4369: reward = 209.17, steps = 1600\n",
      "13:08:04 [DEBUG] evaluate generation 4370: reward = 217.52, steps = 1600\n",
      "13:08:18 [DEBUG] evaluate generation 4371: reward = 213.18, steps = 1600\n",
      "13:08:31 [DEBUG] evaluate generation 4372: reward = 211.39, steps = 1600\n",
      "13:08:43 [DEBUG] evaluate generation 4373: reward = 210.29, steps = 1600\n",
      "13:08:55 [DEBUG] evaluate generation 4374: reward = 217.76, steps = 1600\n",
      "13:09:08 [DEBUG] evaluate generation 4375: reward = 222.55, steps = 1600\n",
      "13:09:18 [DEBUG] evaluate generation 4376: reward = 213.47, steps = 1600\n",
      "13:09:30 [DEBUG] evaluate generation 4377: reward = 225.97, steps = 1600\n",
      "13:09:44 [DEBUG] evaluate generation 4378: reward = 224.42, steps = 1600\n",
      "13:09:56 [DEBUG] evaluate generation 4379: reward = 229.32, steps = 1600\n",
      "13:10:11 [DEBUG] evaluate generation 4380: reward = 241.43, steps = 1600\n",
      "13:10:24 [DEBUG] evaluate generation 4381: reward = 231.94, steps = 1600\n",
      "13:10:34 [DEBUG] evaluate generation 4382: reward = 226.77, steps = 1600\n",
      "13:10:46 [DEBUG] evaluate generation 4383: reward = 227.60, steps = 1600\n",
      "13:10:57 [DEBUG] evaluate generation 4384: reward = 230.04, steps = 1600\n",
      "13:11:08 [DEBUG] evaluate generation 4385: reward = 233.10, steps = 1600\n",
      "13:11:20 [DEBUG] evaluate generation 4386: reward = 234.00, steps = 1600\n",
      "13:11:33 [DEBUG] evaluate generation 4387: reward = 235.09, steps = 1600\n",
      "13:11:48 [DEBUG] evaluate generation 4388: reward = 240.74, steps = 1600\n",
      "13:11:57 [DEBUG] evaluate generation 4389: reward = 227.85, steps = 1600\n",
      "13:12:10 [DEBUG] evaluate generation 4390: reward = -20.95, steps = 606\n",
      "13:12:22 [DEBUG] evaluate generation 4391: reward = 243.15, steps = 1600\n",
      "13:12:35 [DEBUG] evaluate generation 4392: reward = 244.72, steps = 1600\n",
      "13:12:46 [DEBUG] evaluate generation 4393: reward = 228.40, steps = 1600\n",
      "13:12:58 [DEBUG] evaluate generation 4394: reward = 233.65, steps = 1600\n",
      "13:13:08 [DEBUG] evaluate generation 4395: reward = 226.20, steps = 1600\n",
      "13:13:20 [DEBUG] evaluate generation 4396: reward = 233.96, steps = 1600\n",
      "13:13:31 [DEBUG] evaluate generation 4397: reward = 233.68, steps = 1600\n",
      "13:13:43 [DEBUG] evaluate generation 4398: reward = 232.53, steps = 1600\n",
      "13:13:56 [DEBUG] evaluate generation 4399: reward = 229.68, steps = 1600\n",
      "13:14:05 [DEBUG] evaluate generation 4400: reward = 225.34, steps = 1600\n",
      "13:14:15 [DEBUG] evaluate generation 4401: reward = 230.58, steps = 1600\n",
      "13:14:28 [DEBUG] evaluate generation 4402: reward = 72.38, steps = 1192\n",
      "13:14:38 [DEBUG] evaluate generation 4403: reward = 225.93, steps = 1600\n",
      "13:14:51 [DEBUG] evaluate generation 4404: reward = 224.28, steps = 1600\n",
      "13:15:03 [DEBUG] evaluate generation 4405: reward = 225.51, steps = 1600\n",
      "13:15:16 [DEBUG] evaluate generation 4406: reward = 225.84, steps = 1600\n",
      "13:15:29 [DEBUG] evaluate generation 4407: reward = 226.47, steps = 1600\n",
      "13:15:42 [DEBUG] evaluate generation 4408: reward = 227.21, steps = 1600\n",
      "13:15:55 [DEBUG] evaluate generation 4409: reward = 239.93, steps = 1600\n",
      "13:16:07 [DEBUG] evaluate generation 4410: reward = 237.98, steps = 1600\n",
      "13:16:20 [DEBUG] evaluate generation 4411: reward = 236.40, steps = 1600\n",
      "13:16:33 [DEBUG] evaluate generation 4412: reward = 224.83, steps = 1600\n",
      "13:16:47 [DEBUG] evaluate generation 4413: reward = 228.80, steps = 1600\n",
      "13:17:01 [DEBUG] evaluate generation 4414: reward = 231.37, steps = 1600\n",
      "13:17:14 [DEBUG] evaluate generation 4415: reward = 239.80, steps = 1600\n",
      "13:17:27 [DEBUG] evaluate generation 4416: reward = 226.32, steps = 1600\n",
      "13:17:42 [DEBUG] evaluate generation 4417: reward = 241.77, steps = 1600\n",
      "13:17:56 [DEBUG] evaluate generation 4418: reward = 234.22, steps = 1600\n",
      "13:18:10 [DEBUG] evaluate generation 4419: reward = 242.50, steps = 1600\n",
      "13:18:20 [DEBUG] evaluate generation 4420: reward = 241.42, steps = 1600\n",
      "13:18:32 [DEBUG] evaluate generation 4421: reward = 232.95, steps = 1600\n",
      "13:18:43 [DEBUG] evaluate generation 4422: reward = 233.55, steps = 1600\n",
      "13:18:55 [DEBUG] evaluate generation 4423: reward = 240.35, steps = 1600\n",
      "13:19:05 [DEBUG] evaluate generation 4424: reward = 238.73, steps = 1600\n",
      "13:19:18 [DEBUG] evaluate generation 4425: reward = 232.58, steps = 1600\n",
      "13:19:29 [DEBUG] evaluate generation 4426: reward = 228.67, steps = 1600\n",
      "13:19:42 [DEBUG] evaluate generation 4427: reward = 228.92, steps = 1600\n",
      "13:19:53 [DEBUG] evaluate generation 4428: reward = 220.34, steps = 1600\n",
      "13:20:08 [DEBUG] evaluate generation 4429: reward = 232.26, steps = 1600\n",
      "13:20:22 [DEBUG] evaluate generation 4430: reward = 230.70, steps = 1600\n",
      "13:20:38 [DEBUG] evaluate generation 4431: reward = 235.76, steps = 1600\n",
      "13:20:53 [DEBUG] evaluate generation 4432: reward = 246.47, steps = 1600\n",
      "13:21:03 [DEBUG] evaluate generation 4433: reward = 245.22, steps = 1600\n",
      "13:21:17 [DEBUG] evaluate generation 4434: reward = 244.65, steps = 1600\n",
      "13:21:28 [DEBUG] evaluate generation 4435: reward = 237.20, steps = 1600\n",
      "13:21:39 [DEBUG] evaluate generation 4436: reward = 234.30, steps = 1600\n",
      "13:21:52 [DEBUG] evaluate generation 4437: reward = 239.10, steps = 1600\n",
      "13:22:05 [DEBUG] evaluate generation 4438: reward = 222.65, steps = 1600\n",
      "13:22:16 [DEBUG] evaluate generation 4439: reward = 237.51, steps = 1600\n",
      "13:22:29 [DEBUG] evaluate generation 4440: reward = 230.81, steps = 1600\n",
      "13:22:41 [DEBUG] evaluate generation 4441: reward = 228.00, steps = 1600\n",
      "13:22:55 [DEBUG] evaluate generation 4442: reward = 236.46, steps = 1600\n",
      "13:23:10 [DEBUG] evaluate generation 4443: reward = 223.75, steps = 1600\n",
      "13:23:24 [DEBUG] evaluate generation 4444: reward = 233.94, steps = 1600\n",
      "13:23:36 [DEBUG] evaluate generation 4445: reward = 233.67, steps = 1600\n",
      "13:23:50 [DEBUG] evaluate generation 4446: reward = 245.07, steps = 1600\n",
      "13:24:01 [DEBUG] evaluate generation 4447: reward = 233.02, steps = 1600\n",
      "13:24:14 [DEBUG] evaluate generation 4448: reward = 231.62, steps = 1600\n",
      "13:24:24 [DEBUG] evaluate generation 4449: reward = 236.84, steps = 1600\n",
      "13:24:39 [DEBUG] evaluate generation 4450: reward = 245.24, steps = 1600\n",
      "13:24:52 [DEBUG] evaluate generation 4451: reward = 243.33, steps = 1600\n",
      "13:25:05 [DEBUG] evaluate generation 4452: reward = 237.52, steps = 1600\n",
      "13:25:18 [DEBUG] evaluate generation 4453: reward = 243.44, steps = 1600\n",
      "13:25:32 [DEBUG] evaluate generation 4454: reward = 234.34, steps = 1600\n",
      "13:25:44 [DEBUG] evaluate generation 4455: reward = 247.60, steps = 1600\n",
      "13:25:56 [DEBUG] evaluate generation 4456: reward = 240.82, steps = 1600\n",
      "13:26:07 [DEBUG] evaluate generation 4457: reward = 223.89, steps = 1600\n",
      "13:26:21 [DEBUG] evaluate generation 4458: reward = 241.42, steps = 1600\n",
      "13:26:35 [DEBUG] evaluate generation 4459: reward = 236.49, steps = 1600\n",
      "13:26:48 [DEBUG] evaluate generation 4460: reward = 240.85, steps = 1600\n",
      "13:27:01 [DEBUG] evaluate generation 4461: reward = 230.44, steps = 1600\n",
      "13:27:13 [DEBUG] evaluate generation 4462: reward = 221.61, steps = 1600\n",
      "13:27:25 [DEBUG] evaluate generation 4463: reward = 224.69, steps = 1600\n",
      "13:27:39 [DEBUG] evaluate generation 4464: reward = 232.59, steps = 1600\n",
      "13:27:50 [DEBUG] evaluate generation 4465: reward = 226.27, steps = 1600\n",
      "13:28:03 [DEBUG] evaluate generation 4466: reward = 222.77, steps = 1600\n",
      "13:28:17 [DEBUG] evaluate generation 4467: reward = 214.66, steps = 1600\n",
      "13:28:31 [DEBUG] evaluate generation 4468: reward = 221.49, steps = 1600\n",
      "13:28:43 [DEBUG] evaluate generation 4469: reward = 215.32, steps = 1600\n",
      "13:28:58 [DEBUG] evaluate generation 4470: reward = 219.31, steps = 1600\n",
      "13:29:12 [DEBUG] evaluate generation 4471: reward = 224.39, steps = 1600\n",
      "13:29:22 [DEBUG] evaluate generation 4472: reward = 224.04, steps = 1600\n",
      "13:29:34 [DEBUG] evaluate generation 4473: reward = 221.57, steps = 1600\n",
      "13:29:46 [DEBUG] evaluate generation 4474: reward = 223.69, steps = 1600\n",
      "13:29:56 [DEBUG] evaluate generation 4475: reward = 216.79, steps = 1600\n",
      "13:30:10 [DEBUG] evaluate generation 4476: reward = 232.21, steps = 1600\n",
      "13:30:25 [DEBUG] evaluate generation 4477: reward = 233.53, steps = 1600\n",
      "13:30:36 [DEBUG] evaluate generation 4478: reward = 220.21, steps = 1600\n",
      "13:30:49 [DEBUG] evaluate generation 4479: reward = 225.31, steps = 1600\n",
      "13:31:00 [DEBUG] evaluate generation 4480: reward = 218.44, steps = 1600\n",
      "13:31:11 [DEBUG] evaluate generation 4481: reward = 232.52, steps = 1600\n",
      "13:31:22 [DEBUG] evaluate generation 4482: reward = 228.69, steps = 1600\n",
      "13:31:36 [DEBUG] evaluate generation 4483: reward = 245.61, steps = 1600\n",
      "13:31:47 [DEBUG] evaluate generation 4484: reward = 234.82, steps = 1600\n",
      "13:32:00 [DEBUG] evaluate generation 4485: reward = 240.55, steps = 1600\n",
      "13:32:14 [DEBUG] evaluate generation 4486: reward = 241.04, steps = 1600\n",
      "13:32:25 [DEBUG] evaluate generation 4487: reward = 252.39, steps = 1600\n",
      "13:32:39 [DEBUG] evaluate generation 4488: reward = 225.33, steps = 1600\n",
      "13:32:52 [DEBUG] evaluate generation 4489: reward = 241.96, steps = 1600\n",
      "13:33:03 [DEBUG] evaluate generation 4490: reward = 239.08, steps = 1600\n",
      "13:33:15 [DEBUG] evaluate generation 4491: reward = 238.25, steps = 1600\n",
      "13:33:30 [DEBUG] evaluate generation 4492: reward = 240.52, steps = 1600\n",
      "13:33:43 [DEBUG] evaluate generation 4493: reward = 247.82, steps = 1600\n",
      "13:33:57 [DEBUG] evaluate generation 4494: reward = 236.97, steps = 1600\n",
      "13:34:08 [DEBUG] evaluate generation 4495: reward = 245.59, steps = 1600\n",
      "13:34:19 [DEBUG] evaluate generation 4496: reward = 216.00, steps = 1600\n",
      "13:34:30 [DEBUG] evaluate generation 4497: reward = 230.08, steps = 1600\n",
      "13:34:43 [DEBUG] evaluate generation 4498: reward = 229.66, steps = 1600\n",
      "13:34:56 [DEBUG] evaluate generation 4499: reward = 223.38, steps = 1600\n",
      "13:35:07 [DEBUG] evaluate generation 4500: reward = 220.34, steps = 1600\n",
      "13:35:18 [DEBUG] evaluate generation 4501: reward = 210.00, steps = 1600\n",
      "13:35:31 [DEBUG] evaluate generation 4502: reward = 209.86, steps = 1600\n",
      "13:35:45 [DEBUG] evaluate generation 4503: reward = 215.62, steps = 1600\n",
      "13:35:57 [DEBUG] evaluate generation 4504: reward = 206.40, steps = 1600\n",
      "13:36:11 [DEBUG] evaluate generation 4505: reward = 210.31, steps = 1600\n",
      "13:36:24 [DEBUG] evaluate generation 4506: reward = 214.07, steps = 1600\n",
      "13:36:39 [DEBUG] evaluate generation 4507: reward = 217.26, steps = 1600\n",
      "13:36:54 [DEBUG] evaluate generation 4508: reward = 223.43, steps = 1600\n",
      "13:37:07 [DEBUG] evaluate generation 4509: reward = 219.78, steps = 1600\n",
      "13:37:20 [DEBUG] evaluate generation 4510: reward = 228.39, steps = 1600\n",
      "13:37:33 [DEBUG] evaluate generation 4511: reward = 237.88, steps = 1600\n",
      "13:37:44 [DEBUG] evaluate generation 4512: reward = 225.37, steps = 1600\n",
      "13:37:58 [DEBUG] evaluate generation 4513: reward = 235.32, steps = 1600\n",
      "13:38:11 [DEBUG] evaluate generation 4514: reward = 237.45, steps = 1600\n",
      "13:38:26 [DEBUG] evaluate generation 4515: reward = 232.60, steps = 1600\n",
      "13:38:38 [DEBUG] evaluate generation 4516: reward = 231.43, steps = 1600\n",
      "13:38:53 [DEBUG] evaluate generation 4517: reward = 238.63, steps = 1600\n",
      "13:39:04 [DEBUG] evaluate generation 4518: reward = 242.21, steps = 1600\n",
      "13:39:17 [DEBUG] evaluate generation 4519: reward = 238.77, steps = 1600\n",
      "13:39:31 [DEBUG] evaluate generation 4520: reward = 242.57, steps = 1600\n",
      "13:39:44 [DEBUG] evaluate generation 4521: reward = 234.99, steps = 1600\n",
      "13:39:59 [DEBUG] evaluate generation 4522: reward = 243.17, steps = 1600\n",
      "13:40:12 [DEBUG] evaluate generation 4523: reward = 247.31, steps = 1600\n",
      "13:40:26 [DEBUG] evaluate generation 4524: reward = 256.40, steps = 1600\n",
      "13:40:38 [DEBUG] evaluate generation 4525: reward = 256.54, steps = 1600\n",
      "13:40:50 [DEBUG] evaluate generation 4526: reward = 257.89, steps = 1600\n",
      "13:41:04 [DEBUG] evaluate generation 4527: reward = 242.86, steps = 1600\n",
      "13:41:17 [DEBUG] evaluate generation 4528: reward = 238.92, steps = 1600\n",
      "13:41:29 [DEBUG] evaluate generation 4529: reward = 254.08, steps = 1600\n",
      "13:41:41 [DEBUG] evaluate generation 4530: reward = 237.53, steps = 1600\n",
      "13:41:55 [DEBUG] evaluate generation 4531: reward = 250.48, steps = 1600\n",
      "13:42:07 [DEBUG] evaluate generation 4532: reward = 240.53, steps = 1600\n",
      "13:42:22 [DEBUG] evaluate generation 4533: reward = 239.27, steps = 1600\n",
      "13:42:35 [DEBUG] evaluate generation 4534: reward = 239.03, steps = 1600\n",
      "13:42:48 [DEBUG] evaluate generation 4535: reward = 232.68, steps = 1600\n",
      "13:43:01 [DEBUG] evaluate generation 4536: reward = 244.91, steps = 1600\n",
      "13:43:13 [DEBUG] evaluate generation 4537: reward = 245.13, steps = 1600\n",
      "13:43:28 [DEBUG] evaluate generation 4538: reward = 252.17, steps = 1600\n",
      "13:43:41 [DEBUG] evaluate generation 4539: reward = 252.01, steps = 1600\n",
      "13:43:51 [DEBUG] evaluate generation 4540: reward = 249.36, steps = 1600\n",
      "13:44:05 [DEBUG] evaluate generation 4541: reward = 251.22, steps = 1600\n",
      "13:44:18 [DEBUG] evaluate generation 4542: reward = 256.45, steps = 1600\n",
      "13:44:30 [DEBUG] evaluate generation 4543: reward = 253.98, steps = 1600\n",
      "13:44:43 [DEBUG] evaluate generation 4544: reward = 251.64, steps = 1600\n",
      "13:44:56 [DEBUG] evaluate generation 4545: reward = 253.02, steps = 1600\n",
      "13:45:07 [DEBUG] evaluate generation 4546: reward = 262.20, steps = 1600\n",
      "13:45:19 [DEBUG] evaluate generation 4547: reward = 254.42, steps = 1600\n",
      "13:45:32 [DEBUG] evaluate generation 4548: reward = 246.77, steps = 1600\n",
      "13:45:44 [DEBUG] evaluate generation 4549: reward = 240.39, steps = 1600\n",
      "13:45:57 [DEBUG] evaluate generation 4550: reward = 237.26, steps = 1600\n",
      "13:46:10 [DEBUG] evaluate generation 4551: reward = 233.29, steps = 1600\n",
      "13:46:25 [DEBUG] evaluate generation 4552: reward = 244.59, steps = 1600\n",
      "13:46:40 [DEBUG] evaluate generation 4553: reward = 228.83, steps = 1600\n",
      "13:46:54 [DEBUG] evaluate generation 4554: reward = 255.18, steps = 1600\n",
      "13:47:07 [DEBUG] evaluate generation 4555: reward = 253.11, steps = 1600\n",
      "13:47:19 [DEBUG] evaluate generation 4556: reward = 241.69, steps = 1600\n",
      "13:47:33 [DEBUG] evaluate generation 4557: reward = 244.62, steps = 1600\n",
      "13:47:46 [DEBUG] evaluate generation 4558: reward = 257.01, steps = 1600\n",
      "13:47:58 [DEBUG] evaluate generation 4559: reward = 250.45, steps = 1600\n",
      "13:48:12 [DEBUG] evaluate generation 4560: reward = 259.08, steps = 1600\n",
      "13:48:26 [DEBUG] evaluate generation 4561: reward = 251.73, steps = 1600\n",
      "13:48:39 [DEBUG] evaluate generation 4562: reward = 239.42, steps = 1600\n",
      "13:48:51 [DEBUG] evaluate generation 4563: reward = 241.97, steps = 1600\n",
      "13:49:03 [DEBUG] evaluate generation 4564: reward = 245.10, steps = 1600\n",
      "13:49:19 [DEBUG] evaluate generation 4565: reward = 247.37, steps = 1600\n",
      "13:49:32 [DEBUG] evaluate generation 4566: reward = 245.96, steps = 1600\n",
      "13:49:44 [DEBUG] evaluate generation 4567: reward = 234.96, steps = 1600\n",
      "13:49:58 [DEBUG] evaluate generation 4568: reward = 242.43, steps = 1600\n",
      "13:50:12 [DEBUG] evaluate generation 4569: reward = -64.37, steps = 215\n",
      "13:50:24 [DEBUG] evaluate generation 4570: reward = 253.25, steps = 1600\n",
      "13:50:36 [DEBUG] evaluate generation 4571: reward = 250.48, steps = 1600\n",
      "13:50:48 [DEBUG] evaluate generation 4572: reward = 266.65, steps = 1600\n",
      "13:51:01 [DEBUG] evaluate generation 4573: reward = 252.74, steps = 1600\n",
      "13:51:11 [DEBUG] evaluate generation 4574: reward = 249.67, steps = 1600\n",
      "13:51:23 [DEBUG] evaluate generation 4575: reward = 253.69, steps = 1600\n",
      "13:51:35 [DEBUG] evaluate generation 4576: reward = 247.23, steps = 1600\n",
      "13:51:47 [DEBUG] evaluate generation 4577: reward = 244.40, steps = 1600\n",
      "13:51:58 [DEBUG] evaluate generation 4578: reward = 246.07, steps = 1600\n",
      "13:52:13 [DEBUG] evaluate generation 4579: reward = 236.29, steps = 1600\n",
      "13:52:27 [DEBUG] evaluate generation 4580: reward = 246.08, steps = 1600\n",
      "13:52:39 [DEBUG] evaluate generation 4581: reward = 246.23, steps = 1600\n",
      "13:52:53 [DEBUG] evaluate generation 4582: reward = 246.02, steps = 1600\n",
      "13:53:07 [DEBUG] evaluate generation 4583: reward = 246.23, steps = 1600\n",
      "13:53:21 [DEBUG] evaluate generation 4584: reward = 253.48, steps = 1600\n",
      "13:53:34 [DEBUG] evaluate generation 4585: reward = 262.23, steps = 1600\n",
      "13:53:45 [DEBUG] evaluate generation 4586: reward = 247.97, steps = 1600\n",
      "13:54:00 [DEBUG] evaluate generation 4587: reward = 271.70, steps = 1600\n",
      "13:54:13 [DEBUG] evaluate generation 4588: reward = 262.47, steps = 1600\n",
      "13:54:25 [DEBUG] evaluate generation 4589: reward = 264.95, steps = 1600\n",
      "13:54:41 [DEBUG] evaluate generation 4590: reward = 291.69, steps = 1550\n",
      "13:54:51 [DEBUG] evaluate generation 4591: reward = 261.34, steps = 1600\n",
      "13:55:05 [DEBUG] evaluate generation 4592: reward = 265.41, steps = 1600\n",
      "13:55:18 [DEBUG] evaluate generation 4593: reward = 250.58, steps = 1600\n",
      "13:55:32 [DEBUG] evaluate generation 4594: reward = 248.54, steps = 1600\n",
      "13:55:42 [DEBUG] evaluate generation 4595: reward = 248.46, steps = 1600\n",
      "13:55:55 [DEBUG] evaluate generation 4596: reward = 238.02, steps = 1600\n",
      "13:56:09 [DEBUG] evaluate generation 4597: reward = 246.29, steps = 1600\n",
      "13:56:22 [DEBUG] evaluate generation 4598: reward = 242.12, steps = 1600\n",
      "13:56:36 [DEBUG] evaluate generation 4599: reward = 248.02, steps = 1600\n",
      "13:56:51 [DEBUG] evaluate generation 4600: reward = 241.58, steps = 1600\n",
      "13:57:05 [DEBUG] evaluate generation 4601: reward = 253.40, steps = 1600\n",
      "13:57:21 [DEBUG] evaluate generation 4602: reward = 260.59, steps = 1600\n",
      "13:57:38 [DEBUG] evaluate generation 4603: reward = 262.89, steps = 1600\n",
      "13:57:51 [DEBUG] evaluate generation 4604: reward = 259.45, steps = 1600\n",
      "13:58:06 [DEBUG] evaluate generation 4605: reward = 269.37, steps = 1600\n",
      "13:58:19 [DEBUG] evaluate generation 4606: reward = 268.14, steps = 1600\n",
      "13:58:33 [DEBUG] evaluate generation 4607: reward = 264.29, steps = 1600\n",
      "13:58:44 [DEBUG] evaluate generation 4608: reward = 269.05, steps = 1600\n",
      "13:58:56 [DEBUG] evaluate generation 4609: reward = 249.59, steps = 1600\n",
      "13:59:09 [DEBUG] evaluate generation 4610: reward = 243.38, steps = 1600\n",
      "13:59:23 [DEBUG] evaluate generation 4611: reward = 260.47, steps = 1600\n",
      "13:59:36 [DEBUG] evaluate generation 4612: reward = 257.14, steps = 1600\n",
      "13:59:49 [DEBUG] evaluate generation 4613: reward = 266.36, steps = 1600\n",
      "14:00:03 [DEBUG] evaluate generation 4614: reward = 262.37, steps = 1600\n",
      "14:00:16 [DEBUG] evaluate generation 4615: reward = 274.32, steps = 1600\n",
      "14:00:28 [DEBUG] evaluate generation 4616: reward = 266.66, steps = 1600\n",
      "14:00:42 [DEBUG] evaluate generation 4617: reward = 266.89, steps = 1600\n",
      "14:00:54 [DEBUG] evaluate generation 4618: reward = 247.50, steps = 1600\n",
      "14:01:07 [DEBUG] evaluate generation 4619: reward = 251.98, steps = 1600\n",
      "14:01:22 [DEBUG] evaluate generation 4620: reward = 256.23, steps = 1600\n",
      "14:01:35 [DEBUG] evaluate generation 4621: reward = 261.09, steps = 1600\n",
      "14:01:48 [DEBUG] evaluate generation 4622: reward = 262.14, steps = 1600\n",
      "14:02:00 [DEBUG] evaluate generation 4623: reward = 140.65, steps = 1497\n",
      "14:02:13 [DEBUG] evaluate generation 4624: reward = 251.98, steps = 1600\n",
      "14:02:25 [DEBUG] evaluate generation 4625: reward = 249.72, steps = 1600\n",
      "14:02:39 [DEBUG] evaluate generation 4626: reward = 245.11, steps = 1600\n",
      "14:02:52 [DEBUG] evaluate generation 4627: reward = 255.90, steps = 1600\n",
      "14:03:07 [DEBUG] evaluate generation 4628: reward = 241.82, steps = 1600\n",
      "14:03:18 [DEBUG] evaluate generation 4629: reward = 257.22, steps = 1600\n",
      "14:03:31 [DEBUG] evaluate generation 4630: reward = 252.87, steps = 1600\n",
      "14:03:44 [DEBUG] evaluate generation 4631: reward = 257.52, steps = 1600\n",
      "14:03:57 [DEBUG] evaluate generation 4632: reward = 258.35, steps = 1600\n",
      "14:04:13 [DEBUG] evaluate generation 4633: reward = 251.04, steps = 1600\n",
      "14:04:28 [DEBUG] evaluate generation 4634: reward = 244.92, steps = 1600\n",
      "14:04:42 [DEBUG] evaluate generation 4635: reward = 259.48, steps = 1600\n",
      "14:04:56 [DEBUG] evaluate generation 4636: reward = 246.93, steps = 1600\n",
      "14:05:05 [DEBUG] evaluate generation 4637: reward = 251.50, steps = 1600\n",
      "14:05:20 [DEBUG] evaluate generation 4638: reward = 247.85, steps = 1600\n",
      "14:05:36 [DEBUG] evaluate generation 4639: reward = 265.48, steps = 1600\n",
      "14:05:51 [DEBUG] evaluate generation 4640: reward = 247.86, steps = 1600\n",
      "14:06:03 [DEBUG] evaluate generation 4641: reward = 256.94, steps = 1600\n",
      "14:06:18 [DEBUG] evaluate generation 4642: reward = 263.87, steps = 1600\n",
      "14:06:31 [DEBUG] evaluate generation 4643: reward = 258.01, steps = 1600\n",
      "14:06:43 [DEBUG] evaluate generation 4644: reward = 248.41, steps = 1600\n",
      "14:06:56 [DEBUG] evaluate generation 4645: reward = 247.10, steps = 1600\n",
      "14:07:09 [DEBUG] evaluate generation 4646: reward = 252.47, steps = 1600\n",
      "14:07:23 [DEBUG] evaluate generation 4647: reward = 251.49, steps = 1600\n",
      "14:07:37 [DEBUG] evaluate generation 4648: reward = 250.26, steps = 1600\n",
      "14:07:48 [DEBUG] evaluate generation 4649: reward = 248.46, steps = 1600\n",
      "14:08:00 [DEBUG] evaluate generation 4650: reward = 248.61, steps = 1600\n",
      "14:08:15 [DEBUG] evaluate generation 4651: reward = 250.68, steps = 1600\n",
      "14:08:29 [DEBUG] evaluate generation 4652: reward = 250.53, steps = 1600\n",
      "14:08:42 [DEBUG] evaluate generation 4653: reward = 242.37, steps = 1600\n",
      "14:08:55 [DEBUG] evaluate generation 4654: reward = 240.22, steps = 1600\n",
      "14:09:09 [DEBUG] evaluate generation 4655: reward = 239.06, steps = 1600\n",
      "14:09:24 [DEBUG] evaluate generation 4656: reward = 247.35, steps = 1600\n",
      "14:09:38 [DEBUG] evaluate generation 4657: reward = 245.32, steps = 1600\n",
      "14:09:52 [DEBUG] evaluate generation 4658: reward = 264.31, steps = 1600\n",
      "14:10:05 [DEBUG] evaluate generation 4659: reward = 244.91, steps = 1600\n",
      "14:10:19 [DEBUG] evaluate generation 4660: reward = 261.84, steps = 1600\n",
      "14:10:33 [DEBUG] evaluate generation 4661: reward = 263.47, steps = 1600\n",
      "14:10:47 [DEBUG] evaluate generation 4662: reward = 267.79, steps = 1600\n",
      "14:11:01 [DEBUG] evaluate generation 4663: reward = 256.50, steps = 1600\n",
      "14:11:13 [DEBUG] evaluate generation 4664: reward = 253.98, steps = 1600\n",
      "14:11:23 [DEBUG] evaluate generation 4665: reward = 242.80, steps = 1600\n",
      "14:11:40 [DEBUG] evaluate generation 4666: reward = 245.79, steps = 1600\n",
      "14:11:54 [DEBUG] evaluate generation 4667: reward = 266.93, steps = 1600\n",
      "14:12:05 [DEBUG] evaluate generation 4668: reward = 254.35, steps = 1600\n",
      "14:12:21 [DEBUG] evaluate generation 4669: reward = 258.25, steps = 1600\n",
      "14:12:36 [DEBUG] evaluate generation 4670: reward = 250.44, steps = 1600\n",
      "14:12:48 [DEBUG] evaluate generation 4671: reward = 261.45, steps = 1600\n",
      "14:13:03 [DEBUG] evaluate generation 4672: reward = 277.91, steps = 1600\n",
      "14:13:15 [DEBUG] evaluate generation 4673: reward = 270.89, steps = 1600\n",
      "14:13:28 [DEBUG] evaluate generation 4674: reward = 257.75, steps = 1600\n",
      "14:13:41 [DEBUG] evaluate generation 4675: reward = 263.87, steps = 1600\n",
      "14:13:52 [DEBUG] evaluate generation 4676: reward = 241.48, steps = 1600\n",
      "14:14:05 [DEBUG] evaluate generation 4677: reward = 261.26, steps = 1600\n",
      "14:14:19 [DEBUG] evaluate generation 4678: reward = 249.82, steps = 1600\n",
      "14:14:32 [DEBUG] evaluate generation 4679: reward = 250.27, steps = 1600\n",
      "14:14:45 [DEBUG] evaluate generation 4680: reward = 255.83, steps = 1600\n",
      "14:14:59 [DEBUG] evaluate generation 4681: reward = 253.48, steps = 1600\n",
      "14:15:12 [DEBUG] evaluate generation 4682: reward = 265.47, steps = 1600\n",
      "14:15:23 [DEBUG] evaluate generation 4683: reward = 240.86, steps = 1600\n",
      "14:15:39 [DEBUG] evaluate generation 4684: reward = 256.08, steps = 1600\n",
      "14:15:52 [DEBUG] evaluate generation 4685: reward = 258.97, steps = 1600\n",
      "14:16:04 [DEBUG] evaluate generation 4686: reward = 252.50, steps = 1600\n",
      "14:16:15 [DEBUG] evaluate generation 4687: reward = 272.64, steps = 1600\n",
      "14:16:28 [DEBUG] evaluate generation 4688: reward = 250.09, steps = 1600\n",
      "14:16:42 [DEBUG] evaluate generation 4689: reward = 250.79, steps = 1600\n",
      "14:16:56 [DEBUG] evaluate generation 4690: reward = 231.15, steps = 1600\n",
      "14:17:08 [DEBUG] evaluate generation 4691: reward = 252.14, steps = 1600\n",
      "14:17:21 [DEBUG] evaluate generation 4692: reward = 258.51, steps = 1600\n",
      "14:17:35 [DEBUG] evaluate generation 4693: reward = 269.79, steps = 1600\n",
      "14:17:47 [DEBUG] evaluate generation 4694: reward = 241.53, steps = 1600\n",
      "14:18:01 [DEBUG] evaluate generation 4695: reward = 272.69, steps = 1600\n",
      "14:18:16 [DEBUG] evaluate generation 4696: reward = 272.00, steps = 1600\n",
      "14:18:29 [DEBUG] evaluate generation 4697: reward = 267.76, steps = 1600\n",
      "14:18:42 [DEBUG] evaluate generation 4698: reward = 273.90, steps = 1600\n",
      "14:18:57 [DEBUG] evaluate generation 4699: reward = 290.67, steps = 1589\n",
      "14:19:09 [DEBUG] evaluate generation 4700: reward = 282.97, steps = 1600\n",
      "14:19:22 [DEBUG] evaluate generation 4701: reward = 259.35, steps = 1600\n",
      "14:19:36 [DEBUG] evaluate generation 4702: reward = 276.44, steps = 1600\n",
      "14:19:50 [DEBUG] evaluate generation 4703: reward = 269.27, steps = 1600\n",
      "14:20:04 [DEBUG] evaluate generation 4704: reward = 282.81, steps = 1600\n",
      "14:20:16 [DEBUG] evaluate generation 4705: reward = 283.23, steps = 1600\n",
      "14:20:32 [DEBUG] evaluate generation 4706: reward = 291.46, steps = 1592\n",
      "14:20:46 [DEBUG] evaluate generation 4707: reward = 293.20, steps = 1565\n",
      "14:20:57 [DEBUG] evaluate generation 4708: reward = 287.19, steps = 1600\n",
      "14:21:10 [DEBUG] evaluate generation 4709: reward = 290.98, steps = 1600\n",
      "14:21:21 [DEBUG] evaluate generation 4710: reward = 291.31, steps = 1588\n",
      "14:21:33 [DEBUG] evaluate generation 4711: reward = 278.97, steps = 1600\n",
      "14:21:46 [DEBUG] evaluate generation 4712: reward = 274.01, steps = 1600\n",
      "14:22:00 [DEBUG] evaluate generation 4713: reward = 273.79, steps = 1600\n",
      "14:22:13 [DEBUG] evaluate generation 4714: reward = 270.12, steps = 1600\n",
      "14:22:27 [DEBUG] evaluate generation 4715: reward = 267.46, steps = 1600\n",
      "14:22:40 [DEBUG] evaluate generation 4716: reward = 285.29, steps = 1600\n",
      "14:22:50 [DEBUG] evaluate generation 4717: reward = 291.96, steps = 1594\n",
      "14:23:03 [DEBUG] evaluate generation 4718: reward = 287.68, steps = 1600\n",
      "14:23:13 [DEBUG] evaluate generation 4719: reward = 283.02, steps = 1600\n",
      "14:23:26 [DEBUG] evaluate generation 4720: reward = 276.95, steps = 1600\n",
      "14:23:40 [DEBUG] evaluate generation 4721: reward = 278.01, steps = 1600\n",
      "14:23:53 [DEBUG] evaluate generation 4722: reward = 284.41, steps = 1600\n",
      "14:24:04 [DEBUG] evaluate generation 4723: reward = 273.85, steps = 1600\n",
      "14:24:17 [DEBUG] evaluate generation 4724: reward = 262.22, steps = 1600\n",
      "14:24:28 [DEBUG] evaluate generation 4725: reward = 280.18, steps = 1600\n",
      "14:24:41 [DEBUG] evaluate generation 4726: reward = 284.00, steps = 1600\n",
      "14:24:54 [DEBUG] evaluate generation 4727: reward = 262.38, steps = 1600\n",
      "14:25:06 [DEBUG] evaluate generation 4728: reward = 273.28, steps = 1600\n",
      "14:25:19 [DEBUG] evaluate generation 4729: reward = 274.21, steps = 1600\n",
      "14:25:33 [DEBUG] evaluate generation 4730: reward = 278.22, steps = 1600\n",
      "14:25:44 [DEBUG] evaluate generation 4731: reward = 270.13, steps = 1600\n",
      "14:25:54 [DEBUG] evaluate generation 4732: reward = 253.51, steps = 1600\n",
      "14:26:06 [DEBUG] evaluate generation 4733: reward = 257.90, steps = 1600\n",
      "14:26:18 [DEBUG] evaluate generation 4734: reward = 260.20, steps = 1600\n",
      "14:26:32 [DEBUG] evaluate generation 4735: reward = 251.56, steps = 1600\n",
      "14:26:46 [DEBUG] evaluate generation 4736: reward = 249.22, steps = 1600\n",
      "14:26:59 [DEBUG] evaluate generation 4737: reward = 254.41, steps = 1600\n",
      "14:27:10 [DEBUG] evaluate generation 4738: reward = 252.53, steps = 1600\n",
      "14:27:23 [DEBUG] evaluate generation 4739: reward = 258.62, steps = 1600\n",
      "14:27:37 [DEBUG] evaluate generation 4740: reward = 259.41, steps = 1600\n",
      "14:27:52 [DEBUG] evaluate generation 4741: reward = 280.33, steps = 1600\n",
      "14:28:04 [DEBUG] evaluate generation 4742: reward = 252.05, steps = 1600\n",
      "14:28:16 [DEBUG] evaluate generation 4743: reward = 255.45, steps = 1600\n",
      "14:28:28 [DEBUG] evaluate generation 4744: reward = 264.55, steps = 1600\n",
      "14:28:41 [DEBUG] evaluate generation 4745: reward = 269.40, steps = 1600\n",
      "14:28:53 [DEBUG] evaluate generation 4746: reward = 285.07, steps = 1600\n",
      "14:29:05 [DEBUG] evaluate generation 4747: reward = 283.24, steps = 1600\n",
      "14:29:19 [DEBUG] evaluate generation 4748: reward = 280.42, steps = 1600\n",
      "14:29:32 [DEBUG] evaluate generation 4749: reward = 279.08, steps = 1600\n",
      "14:29:44 [DEBUG] evaluate generation 4750: reward = 277.05, steps = 1600\n",
      "14:29:56 [DEBUG] evaluate generation 4751: reward = 271.18, steps = 1600\n",
      "14:30:09 [DEBUG] evaluate generation 4752: reward = 264.71, steps = 1600\n",
      "14:30:19 [DEBUG] evaluate generation 4753: reward = 257.50, steps = 1600\n",
      "14:30:31 [DEBUG] evaluate generation 4754: reward = 271.71, steps = 1600\n",
      "14:30:46 [DEBUG] evaluate generation 4755: reward = 264.14, steps = 1600\n",
      "14:30:57 [DEBUG] evaluate generation 4756: reward = 278.05, steps = 1600\n",
      "14:31:09 [DEBUG] evaluate generation 4757: reward = 264.08, steps = 1600\n",
      "14:31:24 [DEBUG] evaluate generation 4758: reward = 263.40, steps = 1600\n",
      "14:31:36 [DEBUG] evaluate generation 4759: reward = 288.61, steps = 1600\n",
      "14:31:46 [DEBUG] evaluate generation 4760: reward = 265.51, steps = 1600\n",
      "14:31:58 [DEBUG] evaluate generation 4761: reward = 271.93, steps = 1600\n",
      "14:32:09 [DEBUG] evaluate generation 4762: reward = 262.39, steps = 1600\n",
      "14:32:22 [DEBUG] evaluate generation 4763: reward = 260.40, steps = 1600\n",
      "14:32:33 [DEBUG] evaluate generation 4764: reward = 261.21, steps = 1600\n",
      "14:32:47 [DEBUG] evaluate generation 4765: reward = 250.90, steps = 1600\n",
      "14:33:02 [DEBUG] evaluate generation 4766: reward = 244.59, steps = 1600\n",
      "14:33:15 [DEBUG] evaluate generation 4767: reward = 269.15, steps = 1600\n",
      "14:33:28 [DEBUG] evaluate generation 4768: reward = 286.02, steps = 1600\n",
      "14:33:40 [DEBUG] evaluate generation 4769: reward = 292.67, steps = 1586\n",
      "14:33:51 [DEBUG] evaluate generation 4770: reward = 285.32, steps = 1600\n",
      "14:34:02 [DEBUG] evaluate generation 4771: reward = 266.87, steps = 1600\n",
      "14:34:17 [DEBUG] evaluate generation 4772: reward = 276.07, steps = 1600\n",
      "14:34:29 [DEBUG] evaluate generation 4773: reward = 256.99, steps = 1600\n",
      "14:34:41 [DEBUG] evaluate generation 4774: reward = 264.29, steps = 1600\n",
      "14:34:55 [DEBUG] evaluate generation 4775: reward = 277.65, steps = 1600\n",
      "14:35:07 [DEBUG] evaluate generation 4776: reward = 276.92, steps = 1600\n",
      "14:35:20 [DEBUG] evaluate generation 4777: reward = 268.89, steps = 1600\n",
      "14:35:32 [DEBUG] evaluate generation 4778: reward = 273.71, steps = 1600\n",
      "14:35:48 [DEBUG] evaluate generation 4779: reward = 286.33, steps = 1600\n",
      "14:35:59 [DEBUG] evaluate generation 4780: reward = 252.55, steps = 1600\n",
      "14:36:14 [DEBUG] evaluate generation 4781: reward = 289.51, steps = 1600\n",
      "14:36:27 [DEBUG] evaluate generation 4782: reward = 286.54, steps = 1600\n",
      "14:36:41 [DEBUG] evaluate generation 4783: reward = 289.32, steps = 1600\n",
      "14:36:56 [DEBUG] evaluate generation 4784: reward = 293.01, steps = 1522\n",
      "14:37:10 [DEBUG] evaluate generation 4785: reward = 291.52, steps = 1573\n",
      "14:37:24 [DEBUG] evaluate generation 4786: reward = 278.66, steps = 1600\n",
      "14:37:36 [DEBUG] evaluate generation 4787: reward = 288.13, steps = 1600\n",
      "14:37:47 [DEBUG] evaluate generation 4788: reward = 281.42, steps = 1600\n",
      "14:38:00 [DEBUG] evaluate generation 4789: reward = 288.39, steps = 1600\n",
      "14:38:14 [DEBUG] evaluate generation 4790: reward = 274.70, steps = 1600\n",
      "14:38:27 [DEBUG] evaluate generation 4791: reward = 276.84, steps = 1600\n",
      "14:38:42 [DEBUG] evaluate generation 4792: reward = 291.77, steps = 1530\n",
      "14:38:52 [DEBUG] evaluate generation 4793: reward = 271.80, steps = 1600\n",
      "14:39:05 [DEBUG] evaluate generation 4794: reward = 290.16, steps = 1559\n",
      "14:39:17 [DEBUG] evaluate generation 4795: reward = 278.73, steps = 1600\n",
      "14:39:29 [DEBUG] evaluate generation 4796: reward = 287.69, steps = 1582\n",
      "14:39:44 [DEBUG] evaluate generation 4797: reward = 274.62, steps = 1600\n",
      "14:39:55 [DEBUG] evaluate generation 4798: reward = 288.67, steps = 1578\n",
      "14:40:07 [DEBUG] evaluate generation 4799: reward = 279.08, steps = 1600\n",
      "14:40:18 [DEBUG] evaluate generation 4800: reward = 282.57, steps = 1600\n",
      "14:40:32 [DEBUG] evaluate generation 4801: reward = 290.98, steps = 1586\n",
      "14:40:47 [DEBUG] evaluate generation 4802: reward = 291.26, steps = 1537\n",
      "14:41:00 [DEBUG] evaluate generation 4803: reward = 290.64, steps = 1558\n",
      "14:41:11 [DEBUG] evaluate generation 4804: reward = 286.88, steps = 1600\n",
      "14:41:24 [DEBUG] evaluate generation 4805: reward = 289.01, steps = 1598\n",
      "14:41:38 [DEBUG] evaluate generation 4806: reward = 280.42, steps = 1600\n",
      "14:41:51 [DEBUG] evaluate generation 4807: reward = 290.87, steps = 1600\n",
      "14:42:05 [DEBUG] evaluate generation 4808: reward = 275.19, steps = 1600\n",
      "14:42:18 [DEBUG] evaluate generation 4809: reward = 285.22, steps = 1600\n",
      "14:42:32 [DEBUG] evaluate generation 4810: reward = 259.76, steps = 1600\n",
      "14:42:46 [DEBUG] evaluate generation 4811: reward = 273.21, steps = 1600\n",
      "14:43:00 [DEBUG] evaluate generation 4812: reward = 274.69, steps = 1600\n",
      "14:43:13 [DEBUG] evaluate generation 4813: reward = 265.80, steps = 1600\n",
      "14:43:25 [DEBUG] evaluate generation 4814: reward = 284.02, steps = 1600\n",
      "14:43:39 [DEBUG] evaluate generation 4815: reward = 270.34, steps = 1600\n",
      "14:43:53 [DEBUG] evaluate generation 4816: reward = 276.64, steps = 1600\n",
      "14:44:08 [DEBUG] evaluate generation 4817: reward = 283.27, steps = 1600\n",
      "14:44:21 [DEBUG] evaluate generation 4818: reward = 286.34, steps = 1600\n",
      "14:44:33 [DEBUG] evaluate generation 4819: reward = 264.07, steps = 1600\n",
      "14:44:47 [DEBUG] evaluate generation 4820: reward = 289.33, steps = 1595\n",
      "14:45:00 [DEBUG] evaluate generation 4821: reward = 277.33, steps = 1600\n",
      "14:45:12 [DEBUG] evaluate generation 4822: reward = 278.15, steps = 1600\n",
      "14:45:25 [DEBUG] evaluate generation 4823: reward = 289.95, steps = 1581\n",
      "14:45:39 [DEBUG] evaluate generation 4824: reward = 290.66, steps = 1578\n",
      "14:45:52 [DEBUG] evaluate generation 4825: reward = 291.52, steps = 1548\n",
      "14:46:03 [DEBUG] evaluate generation 4826: reward = 293.02, steps = 1526\n",
      "14:46:15 [DEBUG] evaluate generation 4827: reward = 285.80, steps = 1600\n",
      "14:46:27 [DEBUG] evaluate generation 4828: reward = 291.60, steps = 1530\n",
      "14:46:40 [DEBUG] evaluate generation 4829: reward = 292.40, steps = 1487\n",
      "14:46:55 [DEBUG] evaluate generation 4830: reward = 292.13, steps = 1515\n",
      "14:47:08 [DEBUG] evaluate generation 4831: reward = 295.33, steps = 1477\n",
      "14:47:20 [DEBUG] evaluate generation 4832: reward = 293.28, steps = 1515\n",
      "14:47:35 [DEBUG] evaluate generation 4833: reward = 295.00, steps = 1462\n",
      "14:47:47 [DEBUG] evaluate generation 4834: reward = 272.25, steps = 1600\n",
      "14:48:00 [DEBUG] evaluate generation 4835: reward = 292.10, steps = 1502\n",
      "14:48:15 [DEBUG] evaluate generation 4836: reward = 294.16, steps = 1496\n",
      "14:48:28 [DEBUG] evaluate generation 4837: reward = 291.94, steps = 1521\n",
      "14:48:37 [DEBUG] evaluate generation 4838: reward = 289.54, steps = 1579\n",
      "14:48:49 [DEBUG] evaluate generation 4839: reward = 293.02, steps = 1546\n",
      "14:49:01 [DEBUG] evaluate generation 4840: reward = 291.64, steps = 1551\n",
      "14:49:15 [DEBUG] evaluate generation 4841: reward = 290.29, steps = 1546\n",
      "14:49:28 [DEBUG] evaluate generation 4842: reward = 276.70, steps = 1600\n",
      "14:49:42 [DEBUG] evaluate generation 4843: reward = 290.77, steps = 1544\n",
      "14:49:58 [DEBUG] evaluate generation 4844: reward = 289.98, steps = 1555\n",
      "14:50:11 [DEBUG] evaluate generation 4845: reward = 290.62, steps = 1553\n",
      "14:50:24 [DEBUG] evaluate generation 4846: reward = 288.98, steps = 1535\n",
      "14:50:37 [DEBUG] evaluate generation 4847: reward = 293.58, steps = 1458\n",
      "14:50:50 [DEBUG] evaluate generation 4848: reward = 293.20, steps = 1486\n",
      "14:51:02 [DEBUG] evaluate generation 4849: reward = 288.32, steps = 1553\n",
      "14:51:17 [DEBUG] evaluate generation 4850: reward = 293.58, steps = 1462\n",
      "14:51:30 [DEBUG] evaluate generation 4851: reward = 290.89, steps = 1528\n",
      "14:51:43 [DEBUG] evaluate generation 4852: reward = 289.44, steps = 1531\n",
      "14:51:55 [DEBUG] evaluate generation 4853: reward = 290.63, steps = 1517\n",
      "14:52:06 [DEBUG] evaluate generation 4854: reward = 97.94, steps = 1091\n",
      "14:52:21 [DEBUG] evaluate generation 4855: reward = 290.71, steps = 1516\n",
      "14:52:35 [DEBUG] evaluate generation 4856: reward = 295.22, steps = 1449\n",
      "14:52:48 [DEBUG] evaluate generation 4857: reward = 292.56, steps = 1488\n",
      "14:52:58 [DEBUG] evaluate generation 4858: reward = 288.73, steps = 1538\n",
      "14:53:10 [DEBUG] evaluate generation 4859: reward = 291.92, steps = 1488\n",
      "14:53:20 [DEBUG] evaluate generation 4860: reward = 292.25, steps = 1486\n",
      "14:53:31 [DEBUG] evaluate generation 4861: reward = 288.31, steps = 1580\n",
      "14:53:44 [DEBUG] evaluate generation 4862: reward = 293.75, steps = 1490\n",
      "14:53:53 [DEBUG] evaluate generation 4863: reward = 294.72, steps = 1445\n",
      "14:54:08 [DEBUG] evaluate generation 4864: reward = 292.55, steps = 1487\n",
      "14:54:19 [DEBUG] evaluate generation 4865: reward = 288.88, steps = 1552\n",
      "14:54:31 [DEBUG] evaluate generation 4866: reward = 292.89, steps = 1483\n",
      "14:54:44 [DEBUG] evaluate generation 4867: reward = 289.85, steps = 1572\n",
      "14:54:59 [DEBUG] evaluate generation 4868: reward = 289.21, steps = 1560\n",
      "14:55:12 [DEBUG] evaluate generation 4869: reward = 290.32, steps = 1555\n",
      "14:55:25 [DEBUG] evaluate generation 4870: reward = 291.09, steps = 1543\n",
      "14:55:39 [DEBUG] evaluate generation 4871: reward = 292.07, steps = 1483\n",
      "14:55:50 [DEBUG] evaluate generation 4872: reward = 289.03, steps = 1554\n",
      "14:56:02 [DEBUG] evaluate generation 4873: reward = 291.21, steps = 1568\n",
      "14:56:15 [DEBUG] evaluate generation 4874: reward = 290.83, steps = 1542\n",
      "14:56:28 [DEBUG] evaluate generation 4875: reward = 291.17, steps = 1539\n",
      "14:56:40 [DEBUG] evaluate generation 4876: reward = 290.59, steps = 1557\n",
      "14:56:55 [DEBUG] evaluate generation 4877: reward = 284.33, steps = 1600\n",
      "14:57:09 [DEBUG] evaluate generation 4878: reward = 289.95, steps = 1570\n",
      "14:57:22 [DEBUG] evaluate generation 4879: reward = 290.98, steps = 1534\n",
      "14:57:34 [DEBUG] evaluate generation 4880: reward = 293.76, steps = 1494\n",
      "14:57:45 [DEBUG] evaluate generation 4881: reward = 292.88, steps = 1494\n",
      "14:57:58 [DEBUG] evaluate generation 4882: reward = 291.99, steps = 1490\n",
      "14:58:10 [DEBUG] evaluate generation 4883: reward = 293.81, steps = 1487\n",
      "14:58:23 [DEBUG] evaluate generation 4884: reward = 291.72, steps = 1521\n",
      "14:58:37 [DEBUG] evaluate generation 4885: reward = 287.30, steps = 1600\n",
      "14:58:48 [DEBUG] evaluate generation 4886: reward = 288.52, steps = 1562\n",
      "14:59:02 [DEBUG] evaluate generation 4887: reward = 288.09, steps = 1565\n",
      "14:59:14 [DEBUG] evaluate generation 4888: reward = 268.80, steps = 1600\n",
      "14:59:29 [DEBUG] evaluate generation 4889: reward = 274.96, steps = 1600\n",
      "14:59:43 [DEBUG] evaluate generation 4890: reward = 286.30, steps = 1600\n",
      "14:59:58 [DEBUG] evaluate generation 4891: reward = 285.56, steps = 1593\n",
      "15:00:11 [DEBUG] evaluate generation 4892: reward = 274.40, steps = 1600\n",
      "15:00:24 [DEBUG] evaluate generation 4893: reward = 287.91, steps = 1586\n",
      "15:00:37 [DEBUG] evaluate generation 4894: reward = 290.01, steps = 1554\n",
      "15:00:51 [DEBUG] evaluate generation 4895: reward = 289.84, steps = 1573\n",
      "15:01:04 [DEBUG] evaluate generation 4896: reward = 290.12, steps = 1525\n",
      "15:01:16 [DEBUG] evaluate generation 4897: reward = 290.72, steps = 1555\n",
      "15:01:28 [DEBUG] evaluate generation 4898: reward = 287.96, steps = 1575\n",
      "15:01:42 [DEBUG] evaluate generation 4899: reward = 287.39, steps = 1588\n",
      "15:01:57 [DEBUG] evaluate generation 4900: reward = 287.43, steps = 1568\n",
      "15:02:10 [DEBUG] evaluate generation 4901: reward = 289.80, steps = 1544\n",
      "15:02:24 [DEBUG] evaluate generation 4902: reward = 287.94, steps = 1569\n",
      "15:02:38 [DEBUG] evaluate generation 4903: reward = 287.60, steps = 1593\n",
      "15:02:53 [DEBUG] evaluate generation 4904: reward = 289.62, steps = 1571\n",
      "15:03:08 [DEBUG] evaluate generation 4905: reward = 285.56, steps = 1600\n",
      "15:03:22 [DEBUG] evaluate generation 4906: reward = 288.10, steps = 1556\n",
      "15:03:37 [DEBUG] evaluate generation 4907: reward = 292.19, steps = 1487\n",
      "15:03:52 [DEBUG] evaluate generation 4908: reward = 292.32, steps = 1469\n",
      "15:04:04 [DEBUG] evaluate generation 4909: reward = 292.23, steps = 1500\n",
      "15:04:17 [DEBUG] evaluate generation 4910: reward = 292.20, steps = 1488\n",
      "15:04:32 [DEBUG] evaluate generation 4911: reward = 294.51, steps = 1433\n",
      "15:04:41 [DEBUG] evaluate generation 4912: reward = 295.44, steps = 1414\n",
      "15:04:54 [DEBUG] evaluate generation 4913: reward = 293.21, steps = 1433\n",
      "15:05:07 [DEBUG] evaluate generation 4914: reward = 291.39, steps = 1491\n",
      "15:05:21 [DEBUG] evaluate generation 4915: reward = 291.50, steps = 1466\n",
      "15:05:32 [DEBUG] evaluate generation 4916: reward = 291.87, steps = 1457\n",
      "15:05:46 [DEBUG] evaluate generation 4917: reward = 292.67, steps = 1447\n",
      "15:06:00 [DEBUG] evaluate generation 4918: reward = 292.23, steps = 1432\n",
      "15:06:13 [DEBUG] evaluate generation 4919: reward = 291.44, steps = 1470\n",
      "15:06:26 [DEBUG] evaluate generation 4920: reward = 292.36, steps = 1480\n",
      "15:06:38 [DEBUG] evaluate generation 4921: reward = 292.10, steps = 1472\n",
      "15:06:50 [DEBUG] evaluate generation 4922: reward = 289.87, steps = 1537\n",
      "15:07:03 [DEBUG] evaluate generation 4923: reward = 290.71, steps = 1501\n",
      "15:07:14 [DEBUG] evaluate generation 4924: reward = 288.28, steps = 1529\n",
      "15:07:28 [DEBUG] evaluate generation 4925: reward = 291.05, steps = 1493\n",
      "15:07:40 [DEBUG] evaluate generation 4926: reward = 292.85, steps = 1478\n",
      "15:07:51 [DEBUG] evaluate generation 4927: reward = 290.34, steps = 1514\n",
      "15:08:03 [DEBUG] evaluate generation 4928: reward = 289.31, steps = 1512\n",
      "15:08:18 [DEBUG] evaluate generation 4929: reward = 290.01, steps = 1527\n",
      "15:08:31 [DEBUG] evaluate generation 4930: reward = 290.22, steps = 1523\n",
      "15:08:45 [DEBUG] evaluate generation 4931: reward = 287.81, steps = 1526\n",
      "15:08:56 [DEBUG] evaluate generation 4932: reward = 288.70, steps = 1525\n",
      "15:09:10 [DEBUG] evaluate generation 4933: reward = 288.67, steps = 1529\n",
      "15:09:23 [DEBUG] evaluate generation 4934: reward = 291.24, steps = 1474\n",
      "15:09:37 [DEBUG] evaluate generation 4935: reward = 290.24, steps = 1487\n",
      "15:09:50 [DEBUG] evaluate generation 4936: reward = 290.64, steps = 1504\n",
      "15:10:04 [DEBUG] evaluate generation 4937: reward = 290.54, steps = 1496\n",
      "15:10:16 [DEBUG] evaluate generation 4938: reward = 290.09, steps = 1510\n",
      "15:10:29 [DEBUG] evaluate generation 4939: reward = 291.82, steps = 1466\n",
      "15:10:43 [DEBUG] evaluate generation 4940: reward = 291.34, steps = 1491\n",
      "15:10:57 [DEBUG] evaluate generation 4941: reward = 290.48, steps = 1469\n",
      "15:11:12 [DEBUG] evaluate generation 4942: reward = 294.06, steps = 1415\n",
      "15:11:24 [DEBUG] evaluate generation 4943: reward = 289.17, steps = 1512\n",
      "15:11:39 [DEBUG] evaluate generation 4944: reward = 290.58, steps = 1480\n",
      "15:11:53 [DEBUG] evaluate generation 4945: reward = 289.91, steps = 1512\n",
      "15:12:06 [DEBUG] evaluate generation 4946: reward = 289.90, steps = 1475\n",
      "15:12:18 [DEBUG] evaluate generation 4947: reward = 292.48, steps = 1449\n",
      "15:12:32 [DEBUG] evaluate generation 4948: reward = 289.50, steps = 1482\n",
      "15:12:44 [DEBUG] evaluate generation 4949: reward = 289.13, steps = 1540\n",
      "15:12:56 [DEBUG] evaluate generation 4950: reward = 287.73, steps = 1542\n",
      "15:13:10 [DEBUG] evaluate generation 4951: reward = 290.96, steps = 1480\n",
      "15:13:24 [DEBUG] evaluate generation 4952: reward = 292.65, steps = 1463\n",
      "15:13:38 [DEBUG] evaluate generation 4953: reward = 290.65, steps = 1488\n",
      "15:13:50 [DEBUG] evaluate generation 4954: reward = 292.22, steps = 1485\n",
      "15:14:04 [DEBUG] evaluate generation 4955: reward = 292.52, steps = 1426\n",
      "15:14:18 [DEBUG] evaluate generation 4956: reward = 293.93, steps = 1420\n",
      "15:14:34 [DEBUG] evaluate generation 4957: reward = 292.35, steps = 1436\n",
      "15:14:46 [DEBUG] evaluate generation 4958: reward = 291.16, steps = 1500\n",
      "15:14:59 [DEBUG] evaluate generation 4959: reward = 291.55, steps = 1484\n",
      "15:15:11 [DEBUG] evaluate generation 4960: reward = 291.67, steps = 1464\n",
      "15:15:24 [DEBUG] evaluate generation 4961: reward = 4.82, steps = 647\n",
      "15:15:37 [DEBUG] evaluate generation 4962: reward = 290.71, steps = 1513\n",
      "15:15:50 [DEBUG] evaluate generation 4963: reward = 288.47, steps = 1543\n",
      "15:16:03 [DEBUG] evaluate generation 4964: reward = 287.25, steps = 1546\n",
      "15:16:16 [DEBUG] evaluate generation 4965: reward = 289.29, steps = 1526\n",
      "15:16:30 [DEBUG] evaluate generation 4966: reward = 289.55, steps = 1550\n",
      "15:16:43 [DEBUG] evaluate generation 4967: reward = 291.93, steps = 1480\n",
      "15:16:56 [DEBUG] evaluate generation 4968: reward = 291.19, steps = 1467\n",
      "15:17:08 [DEBUG] evaluate generation 4969: reward = 289.99, steps = 1493\n",
      "15:17:20 [DEBUG] evaluate generation 4970: reward = 286.90, steps = 1578\n",
      "15:17:33 [DEBUG] evaluate generation 4971: reward = 291.11, steps = 1471\n",
      "15:17:46 [DEBUG] evaluate generation 4972: reward = 288.97, steps = 1542\n",
      "15:17:58 [DEBUG] evaluate generation 4973: reward = 0.28, steps = 538\n",
      "15:18:12 [DEBUG] evaluate generation 4974: reward = 290.78, steps = 1493\n",
      "15:18:24 [DEBUG] evaluate generation 4975: reward = 293.47, steps = 1454\n",
      "15:18:37 [DEBUG] evaluate generation 4976: reward = 291.00, steps = 1485\n",
      "15:18:50 [DEBUG] evaluate generation 4977: reward = 292.36, steps = 1483\n",
      "15:19:05 [DEBUG] evaluate generation 4978: reward = 290.96, steps = 1490\n",
      "15:19:19 [DEBUG] evaluate generation 4979: reward = 292.67, steps = 1431\n",
      "15:19:33 [DEBUG] evaluate generation 4980: reward = 292.54, steps = 1459\n",
      "15:19:47 [DEBUG] evaluate generation 4981: reward = 292.06, steps = 1494\n",
      "15:20:00 [DEBUG] evaluate generation 4982: reward = 293.37, steps = 1465\n",
      "15:20:12 [DEBUG] evaluate generation 4983: reward = 292.89, steps = 1454\n",
      "15:20:27 [DEBUG] evaluate generation 4984: reward = 291.26, steps = 1471\n",
      "15:20:42 [DEBUG] evaluate generation 4985: reward = 148.94, steps = 1228\n",
      "15:20:54 [DEBUG] evaluate generation 4986: reward = 290.79, steps = 1482\n",
      "15:21:06 [DEBUG] evaluate generation 4987: reward = 292.69, steps = 1454\n",
      "15:21:16 [DEBUG] evaluate generation 4988: reward = 290.35, steps = 1508\n",
      "15:21:29 [DEBUG] evaluate generation 4989: reward = 290.55, steps = 1495\n",
      "15:21:42 [DEBUG] evaluate generation 4990: reward = 289.81, steps = 1537\n",
      "15:21:55 [DEBUG] evaluate generation 4991: reward = 289.14, steps = 1544\n",
      "15:22:09 [DEBUG] evaluate generation 4992: reward = 290.44, steps = 1510\n",
      "15:22:21 [DEBUG] evaluate generation 4993: reward = 290.39, steps = 1508\n",
      "15:22:34 [DEBUG] evaluate generation 4994: reward = 292.19, steps = 1459\n",
      "15:22:49 [DEBUG] evaluate generation 4995: reward = 291.19, steps = 1455\n",
      "15:23:02 [DEBUG] evaluate generation 4996: reward = 288.78, steps = 1544\n",
      "15:23:14 [DEBUG] evaluate generation 4997: reward = 291.18, steps = 1485\n",
      "15:23:28 [DEBUG] evaluate generation 4998: reward = 291.26, steps = 1482\n",
      "15:23:42 [DEBUG] evaluate generation 4999: reward = 290.52, steps = 1487\n",
      "15:23:55 [DEBUG] evaluate generation 5000: reward = 290.41, steps = 1518\n",
      "15:24:09 [DEBUG] evaluate generation 5001: reward = 290.25, steps = 1493\n",
      "15:24:22 [DEBUG] evaluate generation 5002: reward = 291.41, steps = 1467\n",
      "15:24:34 [DEBUG] evaluate generation 5003: reward = 288.57, steps = 1550\n",
      "15:24:48 [DEBUG] evaluate generation 5004: reward = 290.00, steps = 1506\n",
      "15:25:01 [DEBUG] evaluate generation 5005: reward = 289.72, steps = 1516\n",
      "15:25:14 [DEBUG] evaluate generation 5006: reward = 287.70, steps = 1568\n",
      "15:25:28 [DEBUG] evaluate generation 5007: reward = 288.40, steps = 1536\n",
      "15:25:42 [DEBUG] evaluate generation 5008: reward = 114.46, steps = 1131\n",
      "15:25:56 [DEBUG] evaluate generation 5009: reward = 293.34, steps = 1439\n",
      "15:26:10 [DEBUG] evaluate generation 5010: reward = 293.60, steps = 1454\n",
      "15:26:22 [DEBUG] evaluate generation 5011: reward = 290.76, steps = 1501\n",
      "15:26:36 [DEBUG] evaluate generation 5012: reward = 291.74, steps = 1470\n",
      "15:26:49 [DEBUG] evaluate generation 5013: reward = 293.23, steps = 1467\n",
      "15:27:01 [DEBUG] evaluate generation 5014: reward = 292.53, steps = 1452\n",
      "15:27:15 [DEBUG] evaluate generation 5015: reward = 292.26, steps = 1446\n",
      "15:27:28 [DEBUG] evaluate generation 5016: reward = 292.96, steps = 1452\n",
      "15:27:43 [DEBUG] evaluate generation 5017: reward = 294.73, steps = 1426\n",
      "15:27:56 [DEBUG] evaluate generation 5018: reward = 296.34, steps = 1405\n",
      "15:28:10 [DEBUG] evaluate generation 5019: reward = 292.30, steps = 1471\n",
      "15:28:23 [DEBUG] evaluate generation 5020: reward = 291.82, steps = 1452\n",
      "15:28:37 [DEBUG] evaluate generation 5021: reward = 289.03, steps = 1516\n",
      "15:28:52 [DEBUG] evaluate generation 5022: reward = 291.60, steps = 1475\n",
      "15:29:06 [DEBUG] evaluate generation 5023: reward = 291.68, steps = 1455\n",
      "15:29:19 [DEBUG] evaluate generation 5024: reward = 289.56, steps = 1489\n",
      "15:29:32 [DEBUG] evaluate generation 5025: reward = 289.21, steps = 1513\n",
      "15:29:44 [DEBUG] evaluate generation 5026: reward = 291.50, steps = 1490\n",
      "15:29:57 [DEBUG] evaluate generation 5027: reward = 289.01, steps = 1547\n",
      "15:30:11 [DEBUG] evaluate generation 5028: reward = 289.21, steps = 1553\n",
      "15:30:25 [DEBUG] evaluate generation 5029: reward = 289.33, steps = 1572\n",
      "15:30:37 [DEBUG] evaluate generation 5030: reward = 291.26, steps = 1489\n",
      "15:30:52 [DEBUG] evaluate generation 5031: reward = 290.55, steps = 1535\n",
      "15:31:07 [DEBUG] evaluate generation 5032: reward = 291.53, steps = 1469\n",
      "15:31:19 [DEBUG] evaluate generation 5033: reward = 292.13, steps = 1465\n",
      "15:31:32 [DEBUG] evaluate generation 5034: reward = 290.90, steps = 1502\n",
      "15:31:47 [DEBUG] evaluate generation 5035: reward = 289.38, steps = 1524\n",
      "15:32:00 [DEBUG] evaluate generation 5036: reward = 290.62, steps = 1505\n",
      "15:32:15 [DEBUG] evaluate generation 5037: reward = 291.72, steps = 1496\n",
      "15:32:30 [DEBUG] evaluate generation 5038: reward = 290.63, steps = 1479\n",
      "15:32:44 [DEBUG] evaluate generation 5039: reward = 290.66, steps = 1540\n",
      "15:32:56 [DEBUG] evaluate generation 5040: reward = 291.48, steps = 1489\n",
      "15:33:09 [DEBUG] evaluate generation 5041: reward = 290.97, steps = 1507\n",
      "15:33:22 [DEBUG] evaluate generation 5042: reward = 293.24, steps = 1462\n",
      "15:33:35 [DEBUG] evaluate generation 5043: reward = 290.26, steps = 1524\n",
      "15:33:48 [DEBUG] evaluate generation 5044: reward = 295.98, steps = 1409\n",
      "15:34:03 [DEBUG] evaluate generation 5045: reward = 294.06, steps = 1440\n",
      "15:34:16 [DEBUG] evaluate generation 5046: reward = 294.37, steps = 1409\n",
      "15:34:27 [DEBUG] evaluate generation 5047: reward = 292.59, steps = 1455\n",
      "15:34:39 [DEBUG] evaluate generation 5048: reward = 294.21, steps = 1441\n",
      "15:34:52 [DEBUG] evaluate generation 5049: reward = 292.02, steps = 1455\n",
      "15:35:04 [DEBUG] evaluate generation 5050: reward = 292.85, steps = 1461\n",
      "15:35:18 [DEBUG] evaluate generation 5051: reward = 290.10, steps = 1529\n",
      "15:35:32 [DEBUG] evaluate generation 5052: reward = 289.14, steps = 1533\n",
      "15:35:46 [DEBUG] evaluate generation 5053: reward = 292.38, steps = 1472\n",
      "15:35:59 [DEBUG] evaluate generation 5054: reward = 290.61, steps = 1509\n",
      "15:36:11 [DEBUG] evaluate generation 5055: reward = 290.91, steps = 1475\n",
      "15:36:25 [DEBUG] evaluate generation 5056: reward = 290.79, steps = 1475\n",
      "15:36:40 [DEBUG] evaluate generation 5057: reward = 291.56, steps = 1445\n",
      "15:36:54 [DEBUG] evaluate generation 5058: reward = 288.05, steps = 1520\n",
      "15:37:08 [DEBUG] evaluate generation 5059: reward = 293.45, steps = 1423\n",
      "15:37:22 [DEBUG] evaluate generation 5060: reward = 292.65, steps = 1433\n",
      "15:37:35 [DEBUG] evaluate generation 5061: reward = 291.58, steps = 1456\n",
      "15:37:47 [DEBUG] evaluate generation 5062: reward = 289.84, steps = 1482\n",
      "15:38:02 [DEBUG] evaluate generation 5063: reward = 290.08, steps = 1473\n",
      "15:38:15 [DEBUG] evaluate generation 5064: reward = 286.89, steps = 1513\n",
      "15:38:28 [DEBUG] evaluate generation 5065: reward = 291.40, steps = 1468\n",
      "15:38:41 [DEBUG] evaluate generation 5066: reward = 289.00, steps = 1499\n",
      "15:38:54 [DEBUG] evaluate generation 5067: reward = 286.39, steps = 1537\n",
      "15:39:08 [DEBUG] evaluate generation 5068: reward = 290.97, steps = 1444\n",
      "15:39:21 [DEBUG] evaluate generation 5069: reward = 291.53, steps = 1440\n",
      "15:39:34 [DEBUG] evaluate generation 5070: reward = 289.98, steps = 1463\n",
      "15:39:49 [DEBUG] evaluate generation 5071: reward = 289.46, steps = 1476\n",
      "15:40:05 [DEBUG] evaluate generation 5072: reward = 286.70, steps = 1509\n",
      "15:40:21 [DEBUG] evaluate generation 5073: reward = 287.42, steps = 1528\n",
      "15:40:36 [DEBUG] evaluate generation 5074: reward = 289.08, steps = 1494\n",
      "15:40:49 [DEBUG] evaluate generation 5075: reward = 288.40, steps = 1496\n",
      "15:41:03 [DEBUG] evaluate generation 5076: reward = 289.87, steps = 1479\n",
      "15:41:18 [DEBUG] evaluate generation 5077: reward = 290.98, steps = 1475\n",
      "15:41:32 [DEBUG] evaluate generation 5078: reward = 289.41, steps = 1486\n",
      "15:41:44 [DEBUG] evaluate generation 5079: reward = 289.44, steps = 1484\n",
      "15:41:59 [DEBUG] evaluate generation 5080: reward = 290.38, steps = 1456\n",
      "15:42:12 [DEBUG] evaluate generation 5081: reward = 288.42, steps = 1498\n",
      "15:42:25 [DEBUG] evaluate generation 5082: reward = 288.64, steps = 1506\n",
      "15:42:39 [DEBUG] evaluate generation 5083: reward = 287.27, steps = 1534\n",
      "15:42:53 [DEBUG] evaluate generation 5084: reward = 289.65, steps = 1489\n",
      "15:43:06 [DEBUG] evaluate generation 5085: reward = 290.86, steps = 1477\n",
      "15:43:19 [DEBUG] evaluate generation 5086: reward = 289.80, steps = 1473\n",
      "15:43:33 [DEBUG] evaluate generation 5087: reward = 290.59, steps = 1487\n",
      "15:43:48 [DEBUG] evaluate generation 5088: reward = 291.34, steps = 1454\n",
      "15:44:00 [DEBUG] evaluate generation 5089: reward = 288.85, steps = 1516\n",
      "15:44:14 [DEBUG] evaluate generation 5090: reward = 289.77, steps = 1505\n",
      "15:44:28 [DEBUG] evaluate generation 5091: reward = 289.28, steps = 1487\n",
      "15:44:42 [DEBUG] evaluate generation 5092: reward = 289.38, steps = 1493\n",
      "15:44:56 [DEBUG] evaluate generation 5093: reward = 291.69, steps = 1461\n",
      "15:45:09 [DEBUG] evaluate generation 5094: reward = 288.71, steps = 1498\n",
      "15:45:24 [DEBUG] evaluate generation 5095: reward = 290.76, steps = 1462\n",
      "15:45:37 [DEBUG] evaluate generation 5096: reward = 291.01, steps = 1452\n",
      "15:45:50 [DEBUG] evaluate generation 5097: reward = 290.35, steps = 1480\n",
      "15:46:05 [DEBUG] evaluate generation 5098: reward = 289.00, steps = 1485\n",
      "15:46:19 [DEBUG] evaluate generation 5099: reward = 291.49, steps = 1462\n",
      "15:46:32 [DEBUG] evaluate generation 5100: reward = 291.18, steps = 1460\n",
      "15:46:45 [DEBUG] evaluate generation 5101: reward = 288.24, steps = 1494\n",
      "15:46:59 [DEBUG] evaluate generation 5102: reward = 292.74, steps = 1402\n",
      "15:47:14 [DEBUG] evaluate generation 5103: reward = 292.24, steps = 1419\n",
      "15:47:27 [DEBUG] evaluate generation 5104: reward = 292.27, steps = 1396\n",
      "15:47:40 [DEBUG] evaluate generation 5105: reward = 291.01, steps = 1419\n",
      "15:47:55 [DEBUG] evaluate generation 5106: reward = 292.26, steps = 1430\n",
      "15:48:08 [DEBUG] evaluate generation 5107: reward = 292.65, steps = 1410\n",
      "15:48:24 [DEBUG] evaluate generation 5108: reward = 290.10, steps = 1435\n",
      "15:48:38 [DEBUG] evaluate generation 5109: reward = 290.66, steps = 1456\n",
      "15:48:50 [DEBUG] evaluate generation 5110: reward = 294.15, steps = 1389\n",
      "15:49:04 [DEBUG] evaluate generation 5111: reward = 293.20, steps = 1422\n",
      "15:49:17 [DEBUG] evaluate generation 5112: reward = 293.87, steps = 1389\n",
      "15:49:30 [DEBUG] evaluate generation 5113: reward = 295.19, steps = 1363\n",
      "15:49:42 [DEBUG] evaluate generation 5114: reward = 293.58, steps = 1392\n",
      "15:49:54 [DEBUG] evaluate generation 5115: reward = 291.03, steps = 1453\n",
      "15:50:06 [DEBUG] evaluate generation 5116: reward = 291.77, steps = 1414\n",
      "15:50:20 [DEBUG] evaluate generation 5117: reward = 290.76, steps = 1448\n",
      "15:50:34 [DEBUG] evaluate generation 5118: reward = 293.00, steps = 1428\n",
      "15:50:47 [DEBUG] evaluate generation 5119: reward = 292.28, steps = 1427\n",
      "15:50:59 [DEBUG] evaluate generation 5120: reward = 294.71, steps = 1414\n",
      "15:51:12 [DEBUG] evaluate generation 5121: reward = 291.51, steps = 1415\n",
      "15:51:24 [DEBUG] evaluate generation 5122: reward = 291.84, steps = 1430\n",
      "15:51:38 [DEBUG] evaluate generation 5123: reward = 289.75, steps = 1458\n",
      "15:51:53 [DEBUG] evaluate generation 5124: reward = 292.70, steps = 1411\n",
      "15:52:07 [DEBUG] evaluate generation 5125: reward = 292.99, steps = 1400\n",
      "15:52:19 [DEBUG] evaluate generation 5126: reward = 292.21, steps = 1430\n",
      "15:52:33 [DEBUG] evaluate generation 5127: reward = 291.05, steps = 1453\n",
      "15:52:47 [DEBUG] evaluate generation 5128: reward = 292.60, steps = 1413\n",
      "15:52:59 [DEBUG] evaluate generation 5129: reward = 289.84, steps = 1456\n",
      "15:53:13 [DEBUG] evaluate generation 5130: reward = 292.23, steps = 1434\n",
      "15:53:26 [DEBUG] evaluate generation 5131: reward = 294.20, steps = 1421\n",
      "15:53:37 [DEBUG] evaluate generation 5132: reward = 293.66, steps = 1418\n",
      "15:53:52 [DEBUG] evaluate generation 5133: reward = 291.15, steps = 1465\n",
      "15:54:06 [DEBUG] evaluate generation 5134: reward = 291.85, steps = 1470\n",
      "15:54:20 [DEBUG] evaluate generation 5135: reward = 293.83, steps = 1424\n",
      "15:54:33 [DEBUG] evaluate generation 5136: reward = 294.59, steps = 1402\n",
      "15:54:48 [DEBUG] evaluate generation 5137: reward = 291.78, steps = 1468\n",
      "15:55:01 [DEBUG] evaluate generation 5138: reward = 292.20, steps = 1447\n",
      "15:55:15 [DEBUG] evaluate generation 5139: reward = 291.70, steps = 1460\n",
      "15:55:29 [DEBUG] evaluate generation 5140: reward = 290.79, steps = 1474\n",
      "15:55:44 [DEBUG] evaluate generation 5141: reward = 289.30, steps = 1496\n",
      "15:55:58 [DEBUG] evaluate generation 5142: reward = 290.66, steps = 1441\n",
      "15:56:13 [DEBUG] evaluate generation 5143: reward = 291.22, steps = 1430\n",
      "15:56:25 [DEBUG] evaluate generation 5144: reward = 291.51, steps = 1420\n",
      "15:56:39 [DEBUG] evaluate generation 5145: reward = 289.03, steps = 1474\n",
      "15:56:55 [DEBUG] evaluate generation 5146: reward = 291.08, steps = 1470\n",
      "15:57:07 [DEBUG] evaluate generation 5147: reward = 288.29, steps = 1494\n",
      "15:57:21 [DEBUG] evaluate generation 5148: reward = 293.95, steps = 1402\n",
      "15:57:36 [DEBUG] evaluate generation 5149: reward = 289.71, steps = 1485\n",
      "15:57:51 [DEBUG] evaluate generation 5150: reward = 289.71, steps = 1464\n",
      "15:58:05 [DEBUG] evaluate generation 5151: reward = 294.64, steps = 1392\n",
      "15:58:20 [DEBUG] evaluate generation 5152: reward = 287.81, steps = 1509\n",
      "15:58:33 [DEBUG] evaluate generation 5153: reward = 290.49, steps = 1468\n",
      "15:58:47 [DEBUG] evaluate generation 5154: reward = 292.19, steps = 1427\n",
      "15:59:01 [DEBUG] evaluate generation 5155: reward = 292.53, steps = 1429\n",
      "15:59:15 [DEBUG] evaluate generation 5156: reward = 290.44, steps = 1490\n",
      "15:59:27 [DEBUG] evaluate generation 5157: reward = 292.58, steps = 1400\n",
      "15:59:41 [DEBUG] evaluate generation 5158: reward = 293.75, steps = 1386\n",
      "15:59:55 [DEBUG] evaluate generation 5159: reward = 292.75, steps = 1410\n",
      "16:00:10 [DEBUG] evaluate generation 5160: reward = 292.24, steps = 1430\n",
      "16:00:23 [DEBUG] evaluate generation 5161: reward = 291.87, steps = 1413\n",
      "16:00:38 [DEBUG] evaluate generation 5162: reward = 290.34, steps = 1448\n",
      "16:00:52 [DEBUG] evaluate generation 5163: reward = 293.58, steps = 1392\n",
      "16:01:06 [DEBUG] evaluate generation 5164: reward = 292.21, steps = 1438\n",
      "16:01:19 [DEBUG] evaluate generation 5165: reward = 294.93, steps = 1379\n",
      "16:01:31 [DEBUG] evaluate generation 5166: reward = 290.85, steps = 1446\n",
      "16:01:45 [DEBUG] evaluate generation 5167: reward = 291.57, steps = 1421\n",
      "16:01:58 [DEBUG] evaluate generation 5168: reward = 292.80, steps = 1409\n",
      "16:02:10 [DEBUG] evaluate generation 5169: reward = 293.00, steps = 1409\n",
      "16:02:23 [DEBUG] evaluate generation 5170: reward = 290.05, steps = 1445\n",
      "16:02:36 [DEBUG] evaluate generation 5171: reward = 293.16, steps = 1402\n",
      "16:02:49 [DEBUG] evaluate generation 5172: reward = 288.96, steps = 1492\n",
      "16:03:02 [DEBUG] evaluate generation 5173: reward = 289.87, steps = 1488\n",
      "16:03:17 [DEBUG] evaluate generation 5174: reward = 293.42, steps = 1423\n",
      "16:03:30 [DEBUG] evaluate generation 5175: reward = 293.52, steps = 1433\n",
      "16:03:43 [DEBUG] evaluate generation 5176: reward = 292.80, steps = 1417\n",
      "16:03:55 [DEBUG] evaluate generation 5177: reward = 291.24, steps = 1440\n",
      "16:04:08 [DEBUG] evaluate generation 5178: reward = 290.85, steps = 1487\n",
      "16:04:22 [DEBUG] evaluate generation 5179: reward = 291.52, steps = 1423\n",
      "16:04:35 [DEBUG] evaluate generation 5180: reward = 291.47, steps = 1438\n",
      "16:04:48 [DEBUG] evaluate generation 5181: reward = 290.99, steps = 1455\n",
      "16:05:02 [DEBUG] evaluate generation 5182: reward = 291.41, steps = 1457\n",
      "16:05:15 [DEBUG] evaluate generation 5183: reward = 292.15, steps = 1458\n",
      "16:05:28 [DEBUG] evaluate generation 5184: reward = 292.35, steps = 1442\n",
      "16:05:43 [DEBUG] evaluate generation 5185: reward = 290.56, steps = 1469\n",
      "16:05:58 [DEBUG] evaluate generation 5186: reward = 291.68, steps = 1480\n",
      "16:06:13 [DEBUG] evaluate generation 5187: reward = 294.67, steps = 1405\n",
      "16:06:28 [DEBUG] evaluate generation 5188: reward = 293.79, steps = 1435\n",
      "16:06:41 [DEBUG] evaluate generation 5189: reward = 290.91, steps = 1468\n",
      "16:06:54 [DEBUG] evaluate generation 5190: reward = 291.01, steps = 1480\n",
      "16:07:09 [DEBUG] evaluate generation 5191: reward = 291.54, steps = 1461\n",
      "16:07:24 [DEBUG] evaluate generation 5192: reward = 295.66, steps = 1394\n",
      "16:07:37 [DEBUG] evaluate generation 5193: reward = 295.54, steps = 1379\n",
      "16:07:51 [DEBUG] evaluate generation 5194: reward = 292.23, steps = 1449\n",
      "16:08:05 [DEBUG] evaluate generation 5195: reward = 292.54, steps = 1442\n",
      "16:08:20 [DEBUG] evaluate generation 5196: reward = 293.65, steps = 1385\n",
      "16:08:34 [DEBUG] evaluate generation 5197: reward = 292.24, steps = 1442\n",
      "16:08:49 [DEBUG] evaluate generation 5198: reward = 293.25, steps = 1421\n",
      "16:09:02 [DEBUG] evaluate generation 5199: reward = 295.65, steps = 1356\n",
      "16:09:15 [DEBUG] evaluate generation 5200: reward = 293.30, steps = 1431\n",
      "16:09:29 [DEBUG] evaluate generation 5201: reward = 294.39, steps = 1428\n",
      "16:09:40 [DEBUG] evaluate generation 5202: reward = 292.58, steps = 1426\n",
      "16:09:53 [DEBUG] evaluate generation 5203: reward = 292.38, steps = 1435\n",
      "16:10:06 [DEBUG] evaluate generation 5204: reward = 290.78, steps = 1468\n",
      "16:10:20 [DEBUG] evaluate generation 5205: reward = 291.70, steps = 1474\n",
      "16:10:33 [DEBUG] evaluate generation 5206: reward = 295.26, steps = 1367\n",
      "16:10:47 [DEBUG] evaluate generation 5207: reward = 292.95, steps = 1460\n",
      "16:11:02 [DEBUG] evaluate generation 5208: reward = 296.09, steps = 1369\n",
      "16:11:15 [DEBUG] evaluate generation 5209: reward = 292.86, steps = 1422\n",
      "16:11:29 [DEBUG] evaluate generation 5210: reward = 287.95, steps = 1529\n",
      "16:11:42 [DEBUG] evaluate generation 5211: reward = 293.12, steps = 1412\n",
      "16:11:56 [DEBUG] evaluate generation 5212: reward = 294.66, steps = 1372\n",
      "16:12:09 [DEBUG] evaluate generation 5213: reward = 294.74, steps = 1403\n",
      "16:12:23 [DEBUG] evaluate generation 5214: reward = 290.63, steps = 1424\n",
      "16:12:37 [DEBUG] evaluate generation 5215: reward = 293.08, steps = 1431\n",
      "16:12:51 [DEBUG] evaluate generation 5216: reward = 294.62, steps = 1401\n",
      "16:13:05 [DEBUG] evaluate generation 5217: reward = 291.18, steps = 1457\n",
      "16:13:19 [DEBUG] evaluate generation 5218: reward = 290.59, steps = 1452\n",
      "16:13:33 [DEBUG] evaluate generation 5219: reward = 295.02, steps = 1371\n",
      "16:13:48 [DEBUG] evaluate generation 5220: reward = 292.27, steps = 1424\n",
      "16:14:01 [DEBUG] evaluate generation 5221: reward = 291.16, steps = 1457\n",
      "16:14:16 [DEBUG] evaluate generation 5222: reward = 294.75, steps = 1356\n",
      "16:14:29 [DEBUG] evaluate generation 5223: reward = 289.39, steps = 1459\n",
      "16:14:44 [DEBUG] evaluate generation 5224: reward = 295.29, steps = 1355\n",
      "16:14:58 [DEBUG] evaluate generation 5225: reward = 290.25, steps = 1418\n",
      "16:15:11 [DEBUG] evaluate generation 5226: reward = 288.60, steps = 1497\n",
      "16:15:23 [DEBUG] evaluate generation 5227: reward = 290.68, steps = 1452\n",
      "16:15:38 [DEBUG] evaluate generation 5228: reward = 291.11, steps = 1411\n",
      "16:15:52 [DEBUG] evaluate generation 5229: reward = 293.40, steps = 1395\n",
      "16:16:05 [DEBUG] evaluate generation 5230: reward = 290.08, steps = 1439\n",
      "16:16:20 [DEBUG] evaluate generation 5231: reward = 292.59, steps = 1394\n",
      "16:16:35 [DEBUG] evaluate generation 5232: reward = 294.20, steps = 1392\n",
      "16:16:48 [DEBUG] evaluate generation 5233: reward = 289.73, steps = 1430\n",
      "16:17:03 [DEBUG] evaluate generation 5234: reward = 291.29, steps = 1435\n",
      "16:17:16 [DEBUG] evaluate generation 5235: reward = 292.38, steps = 1409\n",
      "16:17:31 [DEBUG] evaluate generation 5236: reward = 292.69, steps = 1410\n",
      "16:17:46 [DEBUG] evaluate generation 5237: reward = 294.57, steps = 1392\n",
      "16:17:58 [DEBUG] evaluate generation 5238: reward = 298.46, steps = 1328\n",
      "16:18:11 [DEBUG] evaluate generation 5239: reward = 295.59, steps = 1361\n",
      "16:18:24 [DEBUG] evaluate generation 5240: reward = 293.88, steps = 1376\n",
      "16:18:38 [DEBUG] evaluate generation 5241: reward = 291.03, steps = 1438\n",
      "16:18:51 [DEBUG] evaluate generation 5242: reward = 292.13, steps = 1401\n",
      "16:19:05 [DEBUG] evaluate generation 5243: reward = 289.72, steps = 1489\n",
      "16:19:17 [DEBUG] evaluate generation 5244: reward = 293.58, steps = 1381\n",
      "16:19:31 [DEBUG] evaluate generation 5245: reward = 292.74, steps = 1413\n",
      "16:19:45 [DEBUG] evaluate generation 5246: reward = 293.18, steps = 1408\n",
      "16:19:58 [DEBUG] evaluate generation 5247: reward = 296.19, steps = 1341\n",
      "16:20:10 [DEBUG] evaluate generation 5248: reward = 290.66, steps = 1468\n",
      "16:20:22 [DEBUG] evaluate generation 5249: reward = 294.66, steps = 1391\n",
      "16:20:38 [DEBUG] evaluate generation 5250: reward = 288.02, steps = 1523\n",
      "16:20:53 [DEBUG] evaluate generation 5251: reward = 292.84, steps = 1426\n",
      "16:21:07 [DEBUG] evaluate generation 5252: reward = 287.64, steps = 1530\n",
      "16:21:22 [DEBUG] evaluate generation 5253: reward = 289.20, steps = 1494\n",
      "16:21:37 [DEBUG] evaluate generation 5254: reward = 291.16, steps = 1454\n",
      "16:21:49 [DEBUG] evaluate generation 5255: reward = 295.12, steps = 1385\n",
      "16:22:00 [DEBUG] evaluate generation 5256: reward = 292.42, steps = 1441\n",
      "16:22:12 [DEBUG] evaluate generation 5257: reward = 294.03, steps = 1401\n",
      "16:22:26 [DEBUG] evaluate generation 5258: reward = 289.87, steps = 1468\n",
      "16:22:40 [DEBUG] evaluate generation 5259: reward = 290.27, steps = 1457\n",
      "16:22:54 [DEBUG] evaluate generation 5260: reward = 290.69, steps = 1435\n",
      "16:23:07 [DEBUG] evaluate generation 5261: reward = 291.67, steps = 1434\n",
      "16:23:21 [DEBUG] evaluate generation 5262: reward = 294.25, steps = 1398\n",
      "16:23:35 [DEBUG] evaluate generation 5263: reward = 290.05, steps = 1483\n",
      "16:23:48 [DEBUG] evaluate generation 5264: reward = 289.48, steps = 1477\n",
      "16:24:02 [DEBUG] evaluate generation 5265: reward = 290.55, steps = 1451\n",
      "16:24:17 [DEBUG] evaluate generation 5266: reward = 288.90, steps = 1498\n",
      "16:24:31 [DEBUG] evaluate generation 5267: reward = 291.09, steps = 1470\n",
      "16:24:45 [DEBUG] evaluate generation 5268: reward = 293.38, steps = 1404\n",
      "16:25:00 [DEBUG] evaluate generation 5269: reward = 295.12, steps = 1393\n",
      "16:25:11 [DEBUG] evaluate generation 5270: reward = -54.46, steps = 231\n",
      "16:25:26 [DEBUG] evaluate generation 5271: reward = 290.72, steps = 1413\n",
      "16:25:39 [DEBUG] evaluate generation 5272: reward = 292.71, steps = 1403\n",
      "16:25:54 [DEBUG] evaluate generation 5273: reward = 292.52, steps = 1416\n",
      "16:26:06 [DEBUG] evaluate generation 5274: reward = 289.11, steps = 1473\n",
      "16:26:20 [DEBUG] evaluate generation 5275: reward = 289.46, steps = 1468\n",
      "16:26:34 [DEBUG] evaluate generation 5276: reward = 294.83, steps = 1354\n",
      "16:26:49 [DEBUG] evaluate generation 5277: reward = 291.86, steps = 1396\n",
      "16:27:03 [DEBUG] evaluate generation 5278: reward = 294.09, steps = 1371\n",
      "16:27:17 [DEBUG] evaluate generation 5279: reward = 292.68, steps = 1395\n",
      "16:27:29 [DEBUG] evaluate generation 5280: reward = 293.80, steps = 1363\n",
      "16:27:42 [DEBUG] evaluate generation 5281: reward = 292.52, steps = 1386\n",
      "16:27:55 [DEBUG] evaluate generation 5282: reward = 290.74, steps = 1439\n",
      "16:28:09 [DEBUG] evaluate generation 5283: reward = 290.46, steps = 1422\n",
      "16:28:22 [DEBUG] evaluate generation 5284: reward = 291.00, steps = 1461\n",
      "16:28:36 [DEBUG] evaluate generation 5285: reward = 293.71, steps = 1383\n",
      "16:28:52 [DEBUG] evaluate generation 5286: reward = 290.25, steps = 1424\n",
      "16:29:05 [DEBUG] evaluate generation 5287: reward = 291.17, steps = 1410\n",
      "16:29:17 [DEBUG] evaluate generation 5288: reward = 291.21, steps = 1423\n",
      "16:29:30 [DEBUG] evaluate generation 5289: reward = 292.27, steps = 1422\n",
      "16:29:45 [DEBUG] evaluate generation 5290: reward = 288.72, steps = 1450\n",
      "16:30:00 [DEBUG] evaluate generation 5291: reward = 289.35, steps = 1440\n",
      "16:30:12 [DEBUG] evaluate generation 5292: reward = 290.93, steps = 1422\n",
      "16:30:26 [DEBUG] evaluate generation 5293: reward = 293.57, steps = 1390\n",
      "16:30:41 [DEBUG] evaluate generation 5294: reward = 292.46, steps = 1409\n",
      "16:30:55 [DEBUG] evaluate generation 5295: reward = 290.63, steps = 1404\n",
      "16:31:09 [DEBUG] evaluate generation 5296: reward = 294.95, steps = 1360\n",
      "16:31:22 [DEBUG] evaluate generation 5297: reward = 293.50, steps = 1364\n",
      "16:31:35 [DEBUG] evaluate generation 5298: reward = 292.44, steps = 1385\n",
      "16:31:47 [DEBUG] evaluate generation 5299: reward = 292.75, steps = 1389\n",
      "16:31:59 [DEBUG] evaluate generation 5300: reward = 292.12, steps = 1380\n",
      "16:32:13 [DEBUG] evaluate generation 5301: reward = 292.57, steps = 1381\n",
      "16:32:27 [DEBUG] evaluate generation 5302: reward = 292.51, steps = 1402\n",
      "16:32:40 [DEBUG] evaluate generation 5303: reward = 292.33, steps = 1402\n",
      "16:32:54 [DEBUG] evaluate generation 5304: reward = 297.67, steps = 1290\n",
      "16:33:07 [DEBUG] evaluate generation 5305: reward = 294.81, steps = 1336\n",
      "16:33:20 [DEBUG] evaluate generation 5306: reward = 294.90, steps = 1357\n",
      "16:33:33 [DEBUG] evaluate generation 5307: reward = 291.93, steps = 1402\n",
      "16:33:46 [DEBUG] evaluate generation 5308: reward = 293.73, steps = 1379\n",
      "16:34:01 [DEBUG] evaluate generation 5309: reward = 294.36, steps = 1366\n",
      "16:34:13 [DEBUG] evaluate generation 5310: reward = 290.35, steps = 1419\n",
      "16:34:27 [DEBUG] evaluate generation 5311: reward = 291.15, steps = 1406\n",
      "16:34:41 [DEBUG] evaluate generation 5312: reward = 293.51, steps = 1352\n",
      "16:34:55 [DEBUG] evaluate generation 5313: reward = 292.21, steps = 1384\n",
      "16:35:09 [DEBUG] evaluate generation 5314: reward = 293.20, steps = 1364\n",
      "16:35:21 [DEBUG] evaluate generation 5315: reward = 291.16, steps = 1400\n",
      "16:35:35 [DEBUG] evaluate generation 5316: reward = 290.55, steps = 1399\n",
      "16:35:47 [DEBUG] evaluate generation 5317: reward = 291.67, steps = 1405\n",
      "16:36:00 [DEBUG] evaluate generation 5318: reward = 295.58, steps = 1334\n",
      "16:36:14 [DEBUG] evaluate generation 5319: reward = 295.94, steps = 1346\n",
      "16:36:27 [DEBUG] evaluate generation 5320: reward = 294.08, steps = 1341\n",
      "16:36:42 [DEBUG] evaluate generation 5321: reward = 295.26, steps = 1341\n",
      "16:36:54 [DEBUG] evaluate generation 5322: reward = 292.40, steps = 1408\n",
      "16:37:07 [DEBUG] evaluate generation 5323: reward = 290.69, steps = 1434\n",
      "16:37:17 [DEBUG] evaluate generation 5324: reward = 291.73, steps = 1433\n",
      "16:37:32 [DEBUG] evaluate generation 5325: reward = 292.68, steps = 1380\n",
      "16:37:47 [DEBUG] evaluate generation 5326: reward = 292.28, steps = 1410\n",
      "16:38:00 [DEBUG] evaluate generation 5327: reward = 290.43, steps = 1441\n",
      "16:38:14 [DEBUG] evaluate generation 5328: reward = 291.84, steps = 1406\n",
      "16:38:28 [DEBUG] evaluate generation 5329: reward = 292.45, steps = 1392\n",
      "16:38:42 [DEBUG] evaluate generation 5330: reward = 295.09, steps = 1331\n",
      "16:38:55 [DEBUG] evaluate generation 5331: reward = 290.97, steps = 1429\n",
      "16:39:09 [DEBUG] evaluate generation 5332: reward = 292.03, steps = 1402\n",
      "16:39:23 [DEBUG] evaluate generation 5333: reward = 291.67, steps = 1428\n",
      "16:39:35 [DEBUG] evaluate generation 5334: reward = 293.06, steps = 1376\n",
      "16:39:50 [DEBUG] evaluate generation 5335: reward = 290.04, steps = 1414\n",
      "16:40:05 [DEBUG] evaluate generation 5336: reward = 292.69, steps = 1401\n",
      "16:40:20 [DEBUG] evaluate generation 5337: reward = 294.78, steps = 1350\n",
      "16:40:32 [DEBUG] evaluate generation 5338: reward = 295.08, steps = 1344\n",
      "16:40:45 [DEBUG] evaluate generation 5339: reward = 293.60, steps = 1372\n",
      "16:40:59 [DEBUG] evaluate generation 5340: reward = 290.47, steps = 1421\n",
      "16:41:13 [DEBUG] evaluate generation 5341: reward = 291.17, steps = 1406\n",
      "16:41:28 [DEBUG] evaluate generation 5342: reward = 289.82, steps = 1467\n",
      "16:41:42 [DEBUG] evaluate generation 5343: reward = 292.90, steps = 1392\n",
      "16:41:57 [DEBUG] evaluate generation 5344: reward = 289.27, steps = 1459\n",
      "16:42:11 [DEBUG] evaluate generation 5345: reward = 294.39, steps = 1355\n",
      "16:42:26 [DEBUG] evaluate generation 5346: reward = 292.09, steps = 1429\n",
      "16:42:40 [DEBUG] evaluate generation 5347: reward = 293.92, steps = 1362\n",
      "16:42:55 [DEBUG] evaluate generation 5348: reward = 294.70, steps = 1380\n",
      "16:43:08 [DEBUG] evaluate generation 5349: reward = 293.09, steps = 1381\n",
      "16:43:22 [DEBUG] evaluate generation 5350: reward = 294.52, steps = 1380\n",
      "16:43:36 [DEBUG] evaluate generation 5351: reward = 292.22, steps = 1403\n",
      "16:43:49 [DEBUG] evaluate generation 5352: reward = 291.65, steps = 1438\n",
      "16:44:03 [DEBUG] evaluate generation 5353: reward = 288.78, steps = 1485\n",
      "16:44:17 [DEBUG] evaluate generation 5354: reward = 289.36, steps = 1471\n",
      "16:44:31 [DEBUG] evaluate generation 5355: reward = 289.94, steps = 1463\n",
      "16:44:46 [DEBUG] evaluate generation 5356: reward = 292.64, steps = 1418\n",
      "16:45:00 [DEBUG] evaluate generation 5357: reward = 292.91, steps = 1428\n",
      "16:45:14 [DEBUG] evaluate generation 5358: reward = 288.78, steps = 1503\n",
      "16:45:28 [DEBUG] evaluate generation 5359: reward = 289.51, steps = 1475\n",
      "16:45:40 [DEBUG] evaluate generation 5360: reward = 295.08, steps = 1365\n",
      "16:45:53 [DEBUG] evaluate generation 5361: reward = 289.53, steps = 1462\n",
      "16:46:07 [DEBUG] evaluate generation 5362: reward = 291.40, steps = 1428\n",
      "16:46:20 [DEBUG] evaluate generation 5363: reward = 291.57, steps = 1393\n",
      "16:46:34 [DEBUG] evaluate generation 5364: reward = 292.80, steps = 1429\n",
      "16:46:48 [DEBUG] evaluate generation 5365: reward = 291.38, steps = 1422\n",
      "16:47:01 [DEBUG] evaluate generation 5366: reward = 296.83, steps = 1329\n",
      "16:47:16 [DEBUG] evaluate generation 5367: reward = 290.70, steps = 1435\n",
      "16:47:31 [DEBUG] evaluate generation 5368: reward = 292.05, steps = 1408\n",
      "16:47:45 [DEBUG] evaluate generation 5369: reward = 292.45, steps = 1400\n",
      "16:47:59 [DEBUG] evaluate generation 5370: reward = 292.55, steps = 1405\n",
      "16:48:10 [DEBUG] evaluate generation 5371: reward = 291.37, steps = 1424\n",
      "16:48:24 [DEBUG] evaluate generation 5372: reward = 294.29, steps = 1347\n",
      "16:48:39 [DEBUG] evaluate generation 5373: reward = 290.42, steps = 1473\n",
      "16:48:53 [DEBUG] evaluate generation 5374: reward = 291.82, steps = 1446\n",
      "16:49:07 [DEBUG] evaluate generation 5375: reward = 290.66, steps = 1425\n",
      "16:49:22 [DEBUG] evaluate generation 5376: reward = 288.35, steps = 1498\n",
      "16:49:36 [DEBUG] evaluate generation 5377: reward = 291.35, steps = 1464\n",
      "16:49:51 [DEBUG] evaluate generation 5378: reward = 292.41, steps = 1415\n",
      "16:50:06 [DEBUG] evaluate generation 5379: reward = 291.77, steps = 1425\n",
      "16:50:20 [DEBUG] evaluate generation 5380: reward = 292.99, steps = 1370\n",
      "16:50:34 [DEBUG] evaluate generation 5381: reward = 294.59, steps = 1357\n",
      "16:50:46 [DEBUG] evaluate generation 5382: reward = 290.45, steps = 1436\n",
      "16:50:59 [DEBUG] evaluate generation 5383: reward = 293.14, steps = 1404\n",
      "16:51:12 [DEBUG] evaluate generation 5384: reward = 291.46, steps = 1419\n",
      "16:51:26 [DEBUG] evaluate generation 5385: reward = 120.53, steps = 990\n",
      "16:51:39 [DEBUG] evaluate generation 5386: reward = 293.12, steps = 1372\n",
      "16:51:52 [DEBUG] evaluate generation 5387: reward = 291.49, steps = 1395\n",
      "16:52:05 [DEBUG] evaluate generation 5388: reward = 291.63, steps = 1389\n",
      "16:52:20 [DEBUG] evaluate generation 5389: reward = 293.96, steps = 1342\n",
      "16:52:33 [DEBUG] evaluate generation 5390: reward = 293.50, steps = 1379\n",
      "16:52:48 [DEBUG] evaluate generation 5391: reward = 292.10, steps = 1412\n",
      "16:53:01 [DEBUG] evaluate generation 5392: reward = 291.75, steps = 1400\n",
      "16:53:14 [DEBUG] evaluate generation 5393: reward = 291.33, steps = 1414\n",
      "16:53:27 [DEBUG] evaluate generation 5394: reward = 290.60, steps = 1453\n",
      "16:53:41 [DEBUG] evaluate generation 5395: reward = 82.48, steps = 921\n",
      "16:53:55 [DEBUG] evaluate generation 5396: reward = 290.68, steps = 1419\n",
      "16:54:09 [DEBUG] evaluate generation 5397: reward = 290.53, steps = 1440\n",
      "16:54:22 [DEBUG] evaluate generation 5398: reward = 291.07, steps = 1436\n",
      "16:54:37 [DEBUG] evaluate generation 5399: reward = 291.75, steps = 1394\n",
      "16:54:51 [DEBUG] evaluate generation 5400: reward = 292.05, steps = 1393\n",
      "16:55:06 [DEBUG] evaluate generation 5401: reward = 286.34, steps = 1495\n",
      "16:55:20 [DEBUG] evaluate generation 5402: reward = 287.63, steps = 1499\n",
      "16:55:32 [DEBUG] evaluate generation 5403: reward = 290.17, steps = 1439\n",
      "16:55:47 [DEBUG] evaluate generation 5404: reward = 290.34, steps = 1425\n",
      "16:56:01 [DEBUG] evaluate generation 5405: reward = 291.07, steps = 1421\n",
      "16:56:16 [DEBUG] evaluate generation 5406: reward = 292.17, steps = 1400\n",
      "16:56:31 [DEBUG] evaluate generation 5407: reward = 290.77, steps = 1434\n",
      "16:56:44 [DEBUG] evaluate generation 5408: reward = 290.36, steps = 1436\n",
      "16:56:58 [DEBUG] evaluate generation 5409: reward = 292.79, steps = 1368\n",
      "16:57:12 [DEBUG] evaluate generation 5410: reward = 292.90, steps = 1371\n",
      "16:57:26 [DEBUG] evaluate generation 5411: reward = 293.25, steps = 1358\n",
      "16:57:40 [DEBUG] evaluate generation 5412: reward = 294.30, steps = 1374\n",
      "16:57:54 [DEBUG] evaluate generation 5413: reward = 292.05, steps = 1382\n",
      "16:58:08 [DEBUG] evaluate generation 5414: reward = 292.62, steps = 1404\n",
      "16:58:20 [DEBUG] evaluate generation 5415: reward = 292.23, steps = 1406\n",
      "16:58:34 [DEBUG] evaluate generation 5416: reward = 294.34, steps = 1368\n",
      "16:58:47 [DEBUG] evaluate generation 5417: reward = 292.67, steps = 1372\n",
      "16:59:00 [DEBUG] evaluate generation 5418: reward = 292.17, steps = 1393\n",
      "16:59:13 [DEBUG] evaluate generation 5419: reward = 294.44, steps = 1368\n",
      "16:59:27 [DEBUG] evaluate generation 5420: reward = 289.88, steps = 1457\n",
      "16:59:41 [DEBUG] evaluate generation 5421: reward = 292.57, steps = 1383\n",
      "16:59:56 [DEBUG] evaluate generation 5422: reward = 293.49, steps = 1411\n",
      "17:00:10 [DEBUG] evaluate generation 5423: reward = 293.19, steps = 1373\n",
      "17:00:24 [DEBUG] evaluate generation 5424: reward = 290.70, steps = 1412\n",
      "17:00:38 [DEBUG] evaluate generation 5425: reward = 290.36, steps = 1458\n",
      "17:00:51 [DEBUG] evaluate generation 5426: reward = 292.06, steps = 1403\n",
      "17:01:04 [DEBUG] evaluate generation 5427: reward = 293.51, steps = 1383\n",
      "17:01:18 [DEBUG] evaluate generation 5428: reward = 294.00, steps = 1367\n",
      "17:01:32 [DEBUG] evaluate generation 5429: reward = 294.92, steps = 1353\n",
      "17:01:46 [DEBUG] evaluate generation 5430: reward = 290.58, steps = 1477\n",
      "17:01:57 [DEBUG] evaluate generation 5431: reward = 294.21, steps = 1383\n",
      "17:02:10 [DEBUG] evaluate generation 5432: reward = 293.55, steps = 1400\n",
      "17:02:23 [DEBUG] evaluate generation 5433: reward = 295.78, steps = 1350\n",
      "17:02:36 [DEBUG] evaluate generation 5434: reward = 288.96, steps = 1491\n",
      "17:02:49 [DEBUG] evaluate generation 5435: reward = 292.18, steps = 1419\n",
      "17:03:03 [DEBUG] evaluate generation 5436: reward = 292.33, steps = 1405\n",
      "17:03:17 [DEBUG] evaluate generation 5437: reward = 294.11, steps = 1395\n",
      "17:03:30 [DEBUG] evaluate generation 5438: reward = 294.08, steps = 1376\n",
      "17:03:45 [DEBUG] evaluate generation 5439: reward = 292.55, steps = 1392\n",
      "17:03:58 [DEBUG] evaluate generation 5440: reward = 292.68, steps = 1429\n",
      "17:04:12 [DEBUG] evaluate generation 5441: reward = 291.64, steps = 1436\n",
      "17:04:27 [DEBUG] evaluate generation 5442: reward = 290.78, steps = 1432\n",
      "17:04:41 [DEBUG] evaluate generation 5443: reward = 289.41, steps = 1449\n",
      "17:04:54 [DEBUG] evaluate generation 5444: reward = 290.65, steps = 1403\n",
      "17:05:08 [DEBUG] evaluate generation 5445: reward = 293.19, steps = 1372\n",
      "17:05:23 [DEBUG] evaluate generation 5446: reward = 292.52, steps = 1401\n",
      "17:05:36 [DEBUG] evaluate generation 5447: reward = 291.79, steps = 1419\n",
      "17:05:50 [DEBUG] evaluate generation 5448: reward = 294.22, steps = 1353\n",
      "17:06:05 [DEBUG] evaluate generation 5449: reward = 293.53, steps = 1383\n",
      "17:06:18 [DEBUG] evaluate generation 5450: reward = 293.93, steps = 1372\n",
      "17:06:32 [DEBUG] evaluate generation 5451: reward = 294.09, steps = 1369\n",
      "17:06:45 [DEBUG] evaluate generation 5452: reward = 293.22, steps = 1376\n",
      "17:06:58 [DEBUG] evaluate generation 5453: reward = 291.46, steps = 1407\n",
      "17:07:10 [DEBUG] evaluate generation 5454: reward = 293.91, steps = 1357\n",
      "17:07:23 [DEBUG] evaluate generation 5455: reward = 294.62, steps = 1327\n",
      "17:07:36 [DEBUG] evaluate generation 5456: reward = 288.58, steps = 1457\n",
      "17:07:51 [DEBUG] evaluate generation 5457: reward = 293.13, steps = 1378\n",
      "17:08:05 [DEBUG] evaluate generation 5458: reward = 291.90, steps = 1396\n",
      "17:08:18 [DEBUG] evaluate generation 5459: reward = 290.93, steps = 1399\n",
      "17:08:32 [DEBUG] evaluate generation 5460: reward = 291.55, steps = 1404\n",
      "17:08:46 [DEBUG] evaluate generation 5461: reward = 295.11, steps = 1309\n",
      "17:09:00 [DEBUG] evaluate generation 5462: reward = 293.13, steps = 1363\n",
      "17:09:12 [DEBUG] evaluate generation 5463: reward = 293.49, steps = 1377\n",
      "17:09:24 [DEBUG] evaluate generation 5464: reward = 296.16, steps = 1318\n",
      "17:09:37 [DEBUG] evaluate generation 5465: reward = 290.09, steps = 1413\n",
      "17:09:49 [DEBUG] evaluate generation 5466: reward = 295.15, steps = 1355\n",
      "17:10:03 [DEBUG] evaluate generation 5467: reward = 287.16, steps = 1466\n",
      "17:10:17 [DEBUG] evaluate generation 5468: reward = 286.47, steps = 1502\n",
      "17:10:31 [DEBUG] evaluate generation 5469: reward = 291.73, steps = 1420\n",
      "17:10:46 [DEBUG] evaluate generation 5470: reward = 289.45, steps = 1480\n",
      "17:10:57 [DEBUG] evaluate generation 5471: reward = 296.81, steps = 1344\n",
      "17:11:09 [DEBUG] evaluate generation 5472: reward = 292.61, steps = 1393\n",
      "17:11:23 [DEBUG] evaluate generation 5473: reward = 294.16, steps = 1334\n",
      "17:11:38 [DEBUG] evaluate generation 5474: reward = 291.55, steps = 1421\n",
      "17:11:52 [DEBUG] evaluate generation 5475: reward = 295.87, steps = 1311\n",
      "17:12:06 [DEBUG] evaluate generation 5476: reward = 292.32, steps = 1376\n",
      "17:12:19 [DEBUG] evaluate generation 5477: reward = 293.93, steps = 1347\n",
      "17:12:30 [DEBUG] evaluate generation 5478: reward = 291.70, steps = 1419\n",
      "17:12:43 [DEBUG] evaluate generation 5479: reward = 289.70, steps = 1447\n",
      "17:12:56 [DEBUG] evaluate generation 5480: reward = 293.51, steps = 1406\n",
      "17:13:10 [DEBUG] evaluate generation 5481: reward = 289.70, steps = 1457\n",
      "17:13:24 [DEBUG] evaluate generation 5482: reward = 293.81, steps = 1368\n",
      "17:13:38 [DEBUG] evaluate generation 5483: reward = 294.24, steps = 1361\n",
      "17:13:52 [DEBUG] evaluate generation 5484: reward = 292.27, steps = 1391\n",
      "17:14:06 [DEBUG] evaluate generation 5485: reward = 293.10, steps = 1365\n",
      "17:14:18 [DEBUG] evaluate generation 5486: reward = 290.97, steps = 1404\n",
      "17:14:30 [DEBUG] evaluate generation 5487: reward = 289.02, steps = 1442\n",
      "17:14:42 [DEBUG] evaluate generation 5488: reward = 293.28, steps = 1372\n",
      "17:14:56 [DEBUG] evaluate generation 5489: reward = 291.95, steps = 1399\n",
      "17:15:09 [DEBUG] evaluate generation 5490: reward = 289.69, steps = 1437\n",
      "17:15:23 [DEBUG] evaluate generation 5491: reward = 291.35, steps = 1361\n",
      "17:15:35 [DEBUG] evaluate generation 5492: reward = 293.98, steps = 1358\n",
      "17:15:48 [DEBUG] evaluate generation 5493: reward = 292.81, steps = 1376\n",
      "17:15:59 [DEBUG] evaluate generation 5494: reward = 294.21, steps = 1372\n",
      "17:16:13 [DEBUG] evaluate generation 5495: reward = 293.80, steps = 1381\n",
      "17:16:26 [DEBUG] evaluate generation 5496: reward = 291.02, steps = 1427\n",
      "17:16:40 [DEBUG] evaluate generation 5497: reward = 292.44, steps = 1386\n",
      "17:16:54 [DEBUG] evaluate generation 5498: reward = 290.77, steps = 1402\n",
      "17:17:08 [DEBUG] evaluate generation 5499: reward = 293.52, steps = 1369\n",
      "17:17:23 [DEBUG] evaluate generation 5500: reward = 291.08, steps = 1443\n",
      "17:17:36 [DEBUG] evaluate generation 5501: reward = 293.19, steps = 1402\n",
      "17:17:49 [DEBUG] evaluate generation 5502: reward = 290.52, steps = 1452\n",
      "17:18:02 [DEBUG] evaluate generation 5503: reward = 290.73, steps = 1442\n",
      "17:18:16 [DEBUG] evaluate generation 5504: reward = 291.65, steps = 1423\n",
      "17:18:28 [DEBUG] evaluate generation 5505: reward = 294.55, steps = 1373\n",
      "17:18:42 [DEBUG] evaluate generation 5506: reward = 288.99, steps = 1487\n",
      "17:18:56 [DEBUG] evaluate generation 5507: reward = 290.32, steps = 1458\n",
      "17:19:10 [DEBUG] evaluate generation 5508: reward = 294.18, steps = 1373\n",
      "17:19:22 [DEBUG] evaluate generation 5509: reward = 291.74, steps = 1419\n",
      "17:19:36 [DEBUG] evaluate generation 5510: reward = 291.24, steps = 1424\n",
      "17:19:50 [DEBUG] evaluate generation 5511: reward = 294.67, steps = 1342\n",
      "17:20:04 [DEBUG] evaluate generation 5512: reward = 296.54, steps = 1334\n",
      "17:20:17 [DEBUG] evaluate generation 5513: reward = 292.50, steps = 1401\n",
      "17:20:32 [DEBUG] evaluate generation 5514: reward = 293.51, steps = 1371\n",
      "17:20:47 [DEBUG] evaluate generation 5515: reward = 291.54, steps = 1401\n",
      "17:21:00 [DEBUG] evaluate generation 5516: reward = 294.07, steps = 1374\n",
      "17:21:13 [DEBUG] evaluate generation 5517: reward = 292.41, steps = 1404\n",
      "17:21:25 [DEBUG] evaluate generation 5518: reward = 291.44, steps = 1395\n",
      "17:21:40 [DEBUG] evaluate generation 5519: reward = 295.17, steps = 1363\n",
      "17:21:54 [DEBUG] evaluate generation 5520: reward = 296.88, steps = 1325\n",
      "17:22:08 [DEBUG] evaluate generation 5521: reward = 294.20, steps = 1362\n",
      "17:22:21 [DEBUG] evaluate generation 5522: reward = 295.84, steps = 1337\n",
      "17:22:35 [DEBUG] evaluate generation 5523: reward = 296.24, steps = 1309\n",
      "17:22:47 [DEBUG] evaluate generation 5524: reward = 293.45, steps = 1379\n",
      "17:23:01 [DEBUG] evaluate generation 5525: reward = 294.66, steps = 1344\n",
      "17:23:15 [DEBUG] evaluate generation 5526: reward = 291.26, steps = 1422\n",
      "17:23:28 [DEBUG] evaluate generation 5527: reward = 292.40, steps = 1383\n",
      "17:23:41 [DEBUG] evaluate generation 5528: reward = 293.57, steps = 1373\n",
      "17:23:53 [DEBUG] evaluate generation 5529: reward = 293.81, steps = 1344\n",
      "17:24:08 [DEBUG] evaluate generation 5530: reward = 291.26, steps = 1418\n",
      "17:24:19 [DEBUG] evaluate generation 5531: reward = 294.22, steps = 1341\n",
      "17:24:31 [DEBUG] evaluate generation 5532: reward = 297.35, steps = 1284\n",
      "17:24:44 [DEBUG] evaluate generation 5533: reward = 296.39, steps = 1329\n",
      "17:24:57 [DEBUG] evaluate generation 5534: reward = 294.76, steps = 1338\n",
      "17:25:11 [DEBUG] evaluate generation 5535: reward = 295.52, steps = 1336\n",
      "17:25:24 [DEBUG] evaluate generation 5536: reward = 294.88, steps = 1391\n",
      "17:25:38 [DEBUG] evaluate generation 5537: reward = 293.65, steps = 1384\n",
      "17:25:50 [DEBUG] evaluate generation 5538: reward = 287.17, steps = 1488\n",
      "17:26:03 [DEBUG] evaluate generation 5539: reward = 293.23, steps = 1356\n",
      "17:26:17 [DEBUG] evaluate generation 5540: reward = 292.83, steps = 1389\n",
      "17:26:30 [DEBUG] evaluate generation 5541: reward = 293.86, steps = 1340\n",
      "17:26:44 [DEBUG] evaluate generation 5542: reward = 295.95, steps = 1311\n",
      "17:26:56 [DEBUG] evaluate generation 5543: reward = 297.10, steps = 1310\n",
      "17:27:07 [DEBUG] evaluate generation 5544: reward = 293.37, steps = 1363\n",
      "17:27:21 [DEBUG] evaluate generation 5545: reward = 292.30, steps = 1384\n",
      "17:27:34 [DEBUG] evaluate generation 5546: reward = 294.34, steps = 1350\n",
      "17:27:47 [DEBUG] evaluate generation 5547: reward = 293.72, steps = 1373\n",
      "17:28:00 [DEBUG] evaluate generation 5548: reward = 292.76, steps = 1354\n",
      "17:28:13 [DEBUG] evaluate generation 5549: reward = 291.91, steps = 1393\n",
      "17:28:26 [DEBUG] evaluate generation 5550: reward = 292.09, steps = 1389\n",
      "17:28:41 [DEBUG] evaluate generation 5551: reward = 289.13, steps = 1444\n",
      "17:28:53 [DEBUG] evaluate generation 5552: reward = 293.47, steps = 1388\n",
      "17:29:07 [DEBUG] evaluate generation 5553: reward = 291.07, steps = 1398\n",
      "17:29:21 [DEBUG] evaluate generation 5554: reward = 296.30, steps = 1324\n",
      "17:29:34 [DEBUG] evaluate generation 5555: reward = 292.96, steps = 1347\n",
      "17:29:47 [DEBUG] evaluate generation 5556: reward = 287.86, steps = 1487\n",
      "17:30:01 [DEBUG] evaluate generation 5557: reward = 290.42, steps = 1440\n",
      "17:30:15 [DEBUG] evaluate generation 5558: reward = 293.60, steps = 1374\n",
      "17:30:28 [DEBUG] evaluate generation 5559: reward = 294.35, steps = 1375\n",
      "17:30:42 [DEBUG] evaluate generation 5560: reward = 292.39, steps = 1391\n",
      "17:30:54 [DEBUG] evaluate generation 5561: reward = 292.78, steps = 1394\n",
      "17:31:07 [DEBUG] evaluate generation 5562: reward = 292.97, steps = 1347\n",
      "17:31:21 [DEBUG] evaluate generation 5563: reward = 288.14, steps = 1453\n",
      "17:31:33 [DEBUG] evaluate generation 5564: reward = 292.13, steps = 1391\n",
      "17:31:47 [DEBUG] evaluate generation 5565: reward = 291.52, steps = 1405\n",
      "17:32:01 [DEBUG] evaluate generation 5566: reward = 289.60, steps = 1452\n",
      "17:32:15 [DEBUG] evaluate generation 5567: reward = 295.41, steps = 1333\n",
      "17:32:30 [DEBUG] evaluate generation 5568: reward = 291.75, steps = 1385\n",
      "17:32:44 [DEBUG] evaluate generation 5569: reward = 291.66, steps = 1404\n",
      "17:32:58 [DEBUG] evaluate generation 5570: reward = 293.35, steps = 1386\n",
      "17:33:12 [DEBUG] evaluate generation 5571: reward = 295.71, steps = 1333\n",
      "17:33:26 [DEBUG] evaluate generation 5572: reward = 292.95, steps = 1370\n",
      "17:33:39 [DEBUG] evaluate generation 5573: reward = 294.78, steps = 1317\n",
      "17:33:50 [DEBUG] evaluate generation 5574: reward = 297.22, steps = 1329\n",
      "17:34:03 [DEBUG] evaluate generation 5575: reward = 293.29, steps = 1341\n",
      "17:34:16 [DEBUG] evaluate generation 5576: reward = 292.20, steps = 1403\n",
      "17:34:28 [DEBUG] evaluate generation 5577: reward = 295.15, steps = 1342\n",
      "17:34:41 [DEBUG] evaluate generation 5578: reward = 291.43, steps = 1424\n",
      "17:34:54 [DEBUG] evaluate generation 5579: reward = 295.05, steps = 1354\n",
      "17:35:09 [DEBUG] evaluate generation 5580: reward = 292.61, steps = 1372\n",
      "17:35:24 [DEBUG] evaluate generation 5581: reward = 291.16, steps = 1398\n",
      "17:35:37 [DEBUG] evaluate generation 5582: reward = 293.00, steps = 1385\n",
      "17:35:50 [DEBUG] evaluate generation 5583: reward = 295.27, steps = 1347\n",
      "17:36:03 [DEBUG] evaluate generation 5584: reward = 294.67, steps = 1361\n",
      "17:36:17 [DEBUG] evaluate generation 5585: reward = 294.63, steps = 1346\n",
      "17:36:31 [DEBUG] evaluate generation 5586: reward = 293.24, steps = 1361\n",
      "17:36:44 [DEBUG] evaluate generation 5587: reward = 292.48, steps = 1407\n",
      "17:36:58 [DEBUG] evaluate generation 5588: reward = 291.81, steps = 1418\n",
      "17:37:09 [DEBUG] evaluate generation 5589: reward = 293.54, steps = 1385\n",
      "17:37:24 [DEBUG] evaluate generation 5590: reward = 294.13, steps = 1389\n",
      "17:37:38 [DEBUG] evaluate generation 5591: reward = 288.43, steps = 1483\n",
      "17:37:51 [DEBUG] evaluate generation 5592: reward = 290.96, steps = 1398\n",
      "17:38:04 [DEBUG] evaluate generation 5593: reward = 290.37, steps = 1450\n",
      "17:38:18 [DEBUG] evaluate generation 5594: reward = 290.28, steps = 1427\n",
      "17:38:32 [DEBUG] evaluate generation 5595: reward = 289.02, steps = 1458\n",
      "17:38:46 [DEBUG] evaluate generation 5596: reward = 289.97, steps = 1451\n",
      "17:39:01 [DEBUG] evaluate generation 5597: reward = 294.68, steps = 1348\n",
      "17:39:15 [DEBUG] evaluate generation 5598: reward = 291.21, steps = 1418\n",
      "17:39:29 [DEBUG] evaluate generation 5599: reward = 290.60, steps = 1428\n",
      "17:39:43 [DEBUG] evaluate generation 5600: reward = 290.05, steps = 1455\n",
      "17:39:57 [DEBUG] evaluate generation 5601: reward = 294.16, steps = 1355\n",
      "17:40:12 [DEBUG] evaluate generation 5602: reward = 291.44, steps = 1397\n",
      "17:40:26 [DEBUG] evaluate generation 5603: reward = 287.92, steps = 1481\n",
      "17:40:38 [DEBUG] evaluate generation 5604: reward = 290.98, steps = 1384\n",
      "17:40:52 [DEBUG] evaluate generation 5605: reward = 294.51, steps = 1350\n",
      "17:41:05 [DEBUG] evaluate generation 5606: reward = 296.36, steps = 1301\n",
      "17:41:18 [DEBUG] evaluate generation 5607: reward = 293.65, steps = 1349\n",
      "17:41:32 [DEBUG] evaluate generation 5608: reward = 293.07, steps = 1388\n",
      "17:41:45 [DEBUG] evaluate generation 5609: reward = 293.86, steps = 1367\n",
      "17:41:59 [DEBUG] evaluate generation 5610: reward = 293.23, steps = 1389\n",
      "17:42:13 [DEBUG] evaluate generation 5611: reward = 294.36, steps = 1347\n",
      "17:42:26 [DEBUG] evaluate generation 5612: reward = 294.60, steps = 1368\n",
      "17:42:40 [DEBUG] evaluate generation 5613: reward = 291.43, steps = 1430\n",
      "17:42:53 [DEBUG] evaluate generation 5614: reward = 296.49, steps = 1340\n",
      "17:43:07 [DEBUG] evaluate generation 5615: reward = 293.43, steps = 1393\n",
      "17:43:21 [DEBUG] evaluate generation 5616: reward = 296.19, steps = 1334\n",
      "17:43:35 [DEBUG] evaluate generation 5617: reward = 294.66, steps = 1344\n",
      "17:43:49 [DEBUG] evaluate generation 5618: reward = 295.64, steps = 1321\n",
      "17:44:02 [DEBUG] evaluate generation 5619: reward = 293.33, steps = 1402\n",
      "17:44:16 [DEBUG] evaluate generation 5620: reward = 292.00, steps = 1397\n",
      "17:44:30 [DEBUG] evaluate generation 5621: reward = 293.62, steps = 1387\n",
      "17:44:42 [DEBUG] evaluate generation 5622: reward = 294.62, steps = 1357\n",
      "17:44:56 [DEBUG] evaluate generation 5623: reward = 298.01, steps = 1310\n",
      "17:45:09 [DEBUG] evaluate generation 5624: reward = 295.70, steps = 1384\n",
      "17:45:23 [DEBUG] evaluate generation 5625: reward = 295.90, steps = 1351\n",
      "17:45:37 [DEBUG] evaluate generation 5626: reward = 292.48, steps = 1428\n",
      "17:45:51 [DEBUG] evaluate generation 5627: reward = 294.42, steps = 1355\n",
      "17:46:02 [DEBUG] evaluate generation 5628: reward = 294.89, steps = 1377\n",
      "17:46:16 [DEBUG] evaluate generation 5629: reward = 291.94, steps = 1400\n",
      "17:46:29 [DEBUG] evaluate generation 5630: reward = 288.58, steps = 1459\n",
      "17:46:42 [DEBUG] evaluate generation 5631: reward = 291.46, steps = 1414\n",
      "17:46:55 [DEBUG] evaluate generation 5632: reward = 296.05, steps = 1360\n",
      "17:47:07 [DEBUG] evaluate generation 5633: reward = 292.94, steps = 1397\n",
      "17:47:21 [DEBUG] evaluate generation 5634: reward = 290.37, steps = 1447\n",
      "17:47:34 [DEBUG] evaluate generation 5635: reward = 294.43, steps = 1404\n",
      "17:47:48 [DEBUG] evaluate generation 5636: reward = 293.46, steps = 1410\n",
      "17:48:02 [DEBUG] evaluate generation 5637: reward = 292.08, steps = 1426\n",
      "17:48:16 [DEBUG] evaluate generation 5638: reward = 293.32, steps = 1393\n",
      "17:48:30 [DEBUG] evaluate generation 5639: reward = 291.71, steps = 1439\n",
      "17:48:45 [DEBUG] evaluate generation 5640: reward = 296.02, steps = 1353\n",
      "17:48:58 [DEBUG] evaluate generation 5641: reward = 292.66, steps = 1421\n",
      "17:49:13 [DEBUG] evaluate generation 5642: reward = 296.54, steps = 1335\n",
      "17:49:27 [DEBUG] evaluate generation 5643: reward = 293.34, steps = 1396\n",
      "17:49:40 [DEBUG] evaluate generation 5644: reward = 294.74, steps = 1383\n",
      "17:49:53 [DEBUG] evaluate generation 5645: reward = 291.18, steps = 1445\n",
      "17:50:06 [DEBUG] evaluate generation 5646: reward = 295.46, steps = 1372\n",
      "17:50:20 [DEBUG] evaluate generation 5647: reward = 293.81, steps = 1379\n",
      "17:50:34 [DEBUG] evaluate generation 5648: reward = 288.10, steps = 1521\n",
      "17:50:48 [DEBUG] evaluate generation 5649: reward = 294.08, steps = 1392\n",
      "17:51:01 [DEBUG] evaluate generation 5650: reward = 296.35, steps = 1356\n",
      "17:51:16 [DEBUG] evaluate generation 5651: reward = 295.39, steps = 1379\n",
      "17:51:30 [DEBUG] evaluate generation 5652: reward = 294.31, steps = 1396\n",
      "17:51:44 [DEBUG] evaluate generation 5653: reward = 291.71, steps = 1474\n",
      "17:51:57 [DEBUG] evaluate generation 5654: reward = 294.84, steps = 1396\n",
      "17:52:12 [DEBUG] evaluate generation 5655: reward = 294.43, steps = 1376\n",
      "17:52:27 [DEBUG] evaluate generation 5656: reward = 293.52, steps = 1431\n",
      "17:52:39 [DEBUG] evaluate generation 5657: reward = 297.62, steps = 1327\n",
      "17:52:53 [DEBUG] evaluate generation 5658: reward = 294.95, steps = 1347\n",
      "17:53:05 [DEBUG] evaluate generation 5659: reward = 294.95, steps = 1365\n",
      "17:53:20 [DEBUG] evaluate generation 5660: reward = 292.65, steps = 1408\n",
      "17:53:34 [DEBUG] evaluate generation 5661: reward = 294.56, steps = 1397\n",
      "17:53:48 [DEBUG] evaluate generation 5662: reward = 291.91, steps = 1423\n",
      "17:54:02 [DEBUG] evaluate generation 5663: reward = 295.24, steps = 1389\n",
      "17:54:15 [DEBUG] evaluate generation 5664: reward = 294.93, steps = 1351\n",
      "17:54:30 [DEBUG] evaluate generation 5665: reward = 293.79, steps = 1380\n",
      "17:54:43 [DEBUG] evaluate generation 5666: reward = 111.12, steps = 971\n",
      "17:54:55 [DEBUG] evaluate generation 5667: reward = 294.24, steps = 1361\n",
      "17:55:08 [DEBUG] evaluate generation 5668: reward = 295.74, steps = 1349\n",
      "17:55:21 [DEBUG] evaluate generation 5669: reward = 293.54, steps = 1385\n",
      "17:55:34 [DEBUG] evaluate generation 5670: reward = 296.00, steps = 1331\n",
      "17:55:47 [DEBUG] evaluate generation 5671: reward = 298.39, steps = 1314\n",
      "17:56:02 [DEBUG] evaluate generation 5672: reward = 295.84, steps = 1340\n",
      "17:56:16 [DEBUG] evaluate generation 5673: reward = 297.14, steps = 1330\n",
      "17:56:30 [DEBUG] evaluate generation 5674: reward = 293.34, steps = 1384\n",
      "17:56:44 [DEBUG] evaluate generation 5675: reward = 293.59, steps = 1396\n",
      "17:56:58 [DEBUG] evaluate generation 5676: reward = 291.59, steps = 1427\n",
      "17:57:12 [DEBUG] evaluate generation 5677: reward = 295.22, steps = 1352\n",
      "17:57:26 [DEBUG] evaluate generation 5678: reward = 296.09, steps = 1353\n",
      "17:57:39 [DEBUG] evaluate generation 5679: reward = 290.85, steps = 1407\n",
      "17:57:54 [DEBUG] evaluate generation 5680: reward = 293.47, steps = 1407\n",
      "17:58:08 [DEBUG] evaluate generation 5681: reward = 295.96, steps = 1353\n",
      "17:58:22 [DEBUG] evaluate generation 5682: reward = 294.26, steps = 1357\n",
      "17:58:37 [DEBUG] evaluate generation 5683: reward = 299.75, steps = 1264\n",
      "17:58:50 [DEBUG] evaluate generation 5684: reward = 296.02, steps = 1350\n",
      "17:59:04 [DEBUG] evaluate generation 5685: reward = 296.29, steps = 1333\n",
      "17:59:16 [DEBUG] evaluate generation 5686: reward = 294.58, steps = 1361\n",
      "17:59:28 [DEBUG] evaluate generation 5687: reward = 292.81, steps = 1399\n",
      "17:59:41 [DEBUG] evaluate generation 5688: reward = 296.65, steps = 1330\n",
      "17:59:52 [DEBUG] evaluate generation 5689: reward = 293.61, steps = 1388\n",
      "18:00:06 [DEBUG] evaluate generation 5690: reward = 292.19, steps = 1421\n",
      "18:00:20 [DEBUG] evaluate generation 5691: reward = 292.45, steps = 1402\n",
      "18:00:33 [DEBUG] evaluate generation 5692: reward = 291.43, steps = 1453\n",
      "18:00:47 [DEBUG] evaluate generation 5693: reward = 296.07, steps = 1351\n",
      "18:01:00 [DEBUG] evaluate generation 5694: reward = 293.59, steps = 1418\n",
      "18:01:14 [DEBUG] evaluate generation 5695: reward = 296.04, steps = 1361\n",
      "18:01:29 [DEBUG] evaluate generation 5696: reward = 293.52, steps = 1367\n",
      "18:01:44 [DEBUG] evaluate generation 5697: reward = 294.91, steps = 1339\n",
      "18:01:59 [DEBUG] evaluate generation 5698: reward = 295.63, steps = 1377\n",
      "18:02:13 [DEBUG] evaluate generation 5699: reward = 296.44, steps = 1307\n",
      "18:02:27 [DEBUG] evaluate generation 5700: reward = 294.54, steps = 1352\n",
      "18:02:39 [DEBUG] evaluate generation 5701: reward = 293.41, steps = 1356\n",
      "18:02:53 [DEBUG] evaluate generation 5702: reward = 295.89, steps = 1323\n",
      "18:03:06 [DEBUG] evaluate generation 5703: reward = 290.55, steps = 1435\n",
      "18:03:20 [DEBUG] evaluate generation 5704: reward = 291.74, steps = 1400\n",
      "18:03:35 [DEBUG] evaluate generation 5705: reward = 291.76, steps = 1420\n",
      "18:03:49 [DEBUG] evaluate generation 5706: reward = 292.50, steps = 1389\n",
      "18:04:02 [DEBUG] evaluate generation 5707: reward = 292.82, steps = 1403\n",
      "18:04:16 [DEBUG] evaluate generation 5708: reward = 295.79, steps = 1366\n",
      "18:04:30 [DEBUG] evaluate generation 5709: reward = 299.10, steps = 1260\n",
      "18:04:43 [DEBUG] evaluate generation 5710: reward = 295.57, steps = 1320\n",
      "18:04:56 [DEBUG] evaluate generation 5711: reward = 294.57, steps = 1349\n",
      "18:05:10 [DEBUG] evaluate generation 5712: reward = 295.71, steps = 1339\n",
      "18:05:24 [DEBUG] evaluate generation 5713: reward = 294.53, steps = 1353\n",
      "18:05:36 [DEBUG] evaluate generation 5714: reward = 298.59, steps = 1280\n",
      "18:05:49 [DEBUG] evaluate generation 5715: reward = 294.88, steps = 1372\n",
      "18:06:02 [DEBUG] evaluate generation 5716: reward = 292.82, steps = 1368\n",
      "18:06:14 [DEBUG] evaluate generation 5717: reward = 296.87, steps = 1293\n",
      "18:06:27 [DEBUG] evaluate generation 5718: reward = 292.28, steps = 1385\n",
      "18:06:41 [DEBUG] evaluate generation 5719: reward = 295.75, steps = 1329\n",
      "18:06:53 [DEBUG] evaluate generation 5720: reward = 290.85, steps = 1457\n",
      "18:07:07 [DEBUG] evaluate generation 5721: reward = 294.65, steps = 1363\n",
      "18:07:21 [DEBUG] evaluate generation 5722: reward = 295.32, steps = 1325\n",
      "18:07:34 [DEBUG] evaluate generation 5723: reward = 295.08, steps = 1353\n",
      "18:07:48 [DEBUG] evaluate generation 5724: reward = 293.89, steps = 1361\n",
      "18:07:59 [DEBUG] evaluate generation 5725: reward = 295.63, steps = 1345\n",
      "18:08:13 [DEBUG] evaluate generation 5726: reward = 296.35, steps = 1312\n",
      "18:08:25 [DEBUG] evaluate generation 5727: reward = 293.11, steps = 1393\n",
      "18:08:39 [DEBUG] evaluate generation 5728: reward = 298.02, steps = 1291\n",
      "18:08:52 [DEBUG] evaluate generation 5729: reward = 295.85, steps = 1341\n",
      "18:09:05 [DEBUG] evaluate generation 5730: reward = 292.39, steps = 1407\n",
      "18:09:18 [DEBUG] evaluate generation 5731: reward = 295.50, steps = 1339\n",
      "18:09:31 [DEBUG] evaluate generation 5732: reward = 295.43, steps = 1346\n",
      "18:09:44 [DEBUG] evaluate generation 5733: reward = 293.78, steps = 1376\n",
      "18:09:57 [DEBUG] evaluate generation 5734: reward = 295.14, steps = 1339\n",
      "18:10:10 [DEBUG] evaluate generation 5735: reward = 294.86, steps = 1376\n",
      "18:10:23 [DEBUG] evaluate generation 5736: reward = 293.44, steps = 1378\n",
      "18:10:37 [DEBUG] evaluate generation 5737: reward = 293.14, steps = 1418\n",
      "18:10:50 [DEBUG] evaluate generation 5738: reward = 293.98, steps = 1385\n",
      "18:11:03 [DEBUG] evaluate generation 5739: reward = 295.90, steps = 1343\n",
      "18:11:16 [DEBUG] evaluate generation 5740: reward = 292.14, steps = 1451\n",
      "18:11:30 [DEBUG] evaluate generation 5741: reward = 296.13, steps = 1345\n",
      "18:11:44 [DEBUG] evaluate generation 5742: reward = 297.11, steps = 1331\n",
      "18:11:58 [DEBUG] evaluate generation 5743: reward = 291.00, steps = 1443\n",
      "18:12:11 [DEBUG] evaluate generation 5744: reward = 295.81, steps = 1336\n",
      "18:12:25 [DEBUG] evaluate generation 5745: reward = 293.67, steps = 1383\n",
      "18:12:38 [DEBUG] evaluate generation 5746: reward = 295.24, steps = 1379\n",
      "18:12:53 [DEBUG] evaluate generation 5747: reward = 294.84, steps = 1371\n",
      "18:13:07 [DEBUG] evaluate generation 5748: reward = 294.61, steps = 1360\n",
      "18:13:20 [DEBUG] evaluate generation 5749: reward = 294.79, steps = 1381\n",
      "18:13:31 [DEBUG] evaluate generation 5750: reward = 295.17, steps = 1375\n",
      "18:13:43 [DEBUG] evaluate generation 5751: reward = 295.39, steps = 1348\n",
      "18:13:56 [DEBUG] evaluate generation 5752: reward = 294.75, steps = 1375\n",
      "18:14:10 [DEBUG] evaluate generation 5753: reward = 294.94, steps = 1366\n",
      "18:14:25 [DEBUG] evaluate generation 5754: reward = 294.05, steps = 1389\n",
      "18:14:39 [DEBUG] evaluate generation 5755: reward = 294.99, steps = 1383\n",
      "18:14:52 [DEBUG] evaluate generation 5756: reward = 294.96, steps = 1349\n",
      "18:15:03 [DEBUG] evaluate generation 5757: reward = 295.47, steps = 1362\n",
      "18:15:17 [DEBUG] evaluate generation 5758: reward = 294.08, steps = 1398\n",
      "18:15:31 [DEBUG] evaluate generation 5759: reward = 292.82, steps = 1408\n",
      "18:15:44 [DEBUG] evaluate generation 5760: reward = 293.09, steps = 1390\n",
      "18:15:59 [DEBUG] evaluate generation 5761: reward = 294.20, steps = 1399\n",
      "18:16:13 [DEBUG] evaluate generation 5762: reward = 298.14, steps = 1325\n",
      "18:16:26 [DEBUG] evaluate generation 5763: reward = 296.89, steps = 1359\n",
      "18:16:38 [DEBUG] evaluate generation 5764: reward = 299.80, steps = 1283\n",
      "18:16:51 [DEBUG] evaluate generation 5765: reward = 295.62, steps = 1370\n",
      "18:17:03 [DEBUG] evaluate generation 5766: reward = 294.36, steps = 1394\n",
      "18:17:15 [DEBUG] evaluate generation 5767: reward = 296.18, steps = 1348\n",
      "18:17:30 [DEBUG] evaluate generation 5768: reward = 299.14, steps = 1278\n",
      "18:17:43 [DEBUG] evaluate generation 5769: reward = 298.43, steps = 1309\n",
      "18:17:55 [DEBUG] evaluate generation 5770: reward = 294.85, steps = 1374\n",
      "18:18:10 [DEBUG] evaluate generation 5771: reward = 297.41, steps = 1325\n",
      "18:18:23 [DEBUG] evaluate generation 5772: reward = 297.27, steps = 1311\n",
      "18:18:36 [DEBUG] evaluate generation 5773: reward = 297.47, steps = 1286\n",
      "18:18:49 [DEBUG] evaluate generation 5774: reward = 293.07, steps = 1387\n",
      "18:19:03 [DEBUG] evaluate generation 5775: reward = 297.24, steps = 1307\n",
      "18:19:17 [DEBUG] evaluate generation 5776: reward = 296.94, steps = 1301\n",
      "18:19:30 [DEBUG] evaluate generation 5777: reward = 293.48, steps = 1361\n",
      "18:19:43 [DEBUG] evaluate generation 5778: reward = 294.35, steps = 1364\n",
      "18:19:57 [DEBUG] evaluate generation 5779: reward = 297.39, steps = 1321\n",
      "18:20:10 [DEBUG] evaluate generation 5780: reward = 299.81, steps = 1283\n",
      "18:20:23 [DEBUG] evaluate generation 5781: reward = 296.78, steps = 1299\n",
      "18:20:38 [DEBUG] evaluate generation 5782: reward = 296.46, steps = 1322\n",
      "18:20:52 [DEBUG] evaluate generation 5783: reward = 298.57, steps = 1317\n",
      "18:21:03 [DEBUG] evaluate generation 5784: reward = 296.00, steps = 1333\n",
      "18:21:16 [DEBUG] evaluate generation 5785: reward = 294.04, steps = 1397\n",
      "18:21:30 [DEBUG] evaluate generation 5786: reward = 296.36, steps = 1318\n",
      "18:21:43 [DEBUG] evaluate generation 5787: reward = 296.63, steps = 1325\n",
      "18:21:56 [DEBUG] evaluate generation 5788: reward = 295.55, steps = 1399\n",
      "18:22:10 [DEBUG] evaluate generation 5789: reward = 296.31, steps = 1358\n",
      "18:22:23 [DEBUG] evaluate generation 5790: reward = 296.71, steps = 1318\n",
      "18:22:36 [DEBUG] evaluate generation 5791: reward = 296.74, steps = 1331\n",
      "18:22:48 [DEBUG] evaluate generation 5792: reward = 294.91, steps = 1385\n",
      "18:23:00 [DEBUG] evaluate generation 5793: reward = 297.31, steps = 1356\n",
      "18:23:13 [DEBUG] evaluate generation 5794: reward = 299.39, steps = 1307\n",
      "18:23:25 [DEBUG] evaluate generation 5795: reward = 295.82, steps = 1364\n",
      "18:23:39 [DEBUG] evaluate generation 5796: reward = 298.51, steps = 1350\n",
      "18:23:52 [DEBUG] evaluate generation 5797: reward = 300.00, steps = 1312\n",
      "18:24:06 [DEBUG] evaluate generation 5798: reward = 294.38, steps = 1377\n",
      "18:24:17 [DEBUG] evaluate generation 5799: reward = 295.08, steps = 1376\n",
      "18:24:31 [DEBUG] evaluate generation 5800: reward = 295.88, steps = 1353\n",
      "18:24:41 [DEBUG] evaluate generation 5801: reward = 296.48, steps = 1334\n",
      "18:24:55 [DEBUG] evaluate generation 5802: reward = 296.34, steps = 1365\n",
      "18:25:06 [DEBUG] evaluate generation 5803: reward = 294.38, steps = 1396\n",
      "18:25:18 [DEBUG] evaluate generation 5804: reward = 298.15, steps = 1327\n",
      "18:25:30 [DEBUG] evaluate generation 5805: reward = 297.86, steps = 1323\n",
      "18:25:42 [DEBUG] evaluate generation 5806: reward = 296.87, steps = 1344\n",
      "18:25:55 [DEBUG] evaluate generation 5807: reward = 298.27, steps = 1314\n",
      "18:26:09 [DEBUG] evaluate generation 5808: reward = 295.11, steps = 1394\n",
      "18:26:22 [DEBUG] evaluate generation 5809: reward = 297.32, steps = 1340\n",
      "18:26:34 [DEBUG] evaluate generation 5810: reward = 296.59, steps = 1337\n",
      "18:26:48 [DEBUG] evaluate generation 5811: reward = 294.95, steps = 1372\n",
      "18:27:01 [DEBUG] evaluate generation 5812: reward = 299.12, steps = 1327\n",
      "18:27:15 [DEBUG] evaluate generation 5813: reward = 295.98, steps = 1380\n",
      "18:27:28 [DEBUG] evaluate generation 5814: reward = 296.12, steps = 1354\n",
      "18:27:41 [DEBUG] evaluate generation 5815: reward = 296.50, steps = 1342\n",
      "18:27:54 [DEBUG] evaluate generation 5816: reward = 297.24, steps = 1367\n",
      "18:28:05 [DEBUG] evaluate generation 5817: reward = 297.31, steps = 1347\n",
      "18:28:18 [DEBUG] evaluate generation 5818: reward = 295.90, steps = 1363\n",
      "18:28:33 [DEBUG] evaluate generation 5819: reward = 295.12, steps = 1365\n",
      "18:28:46 [DEBUG] evaluate generation 5820: reward = 294.81, steps = 1363\n",
      "18:28:59 [DEBUG] evaluate generation 5821: reward = 296.54, steps = 1325\n",
      "18:29:13 [DEBUG] evaluate generation 5822: reward = 295.09, steps = 1350\n",
      "18:29:26 [DEBUG] evaluate generation 5823: reward = 293.89, steps = 1398\n",
      "18:29:39 [DEBUG] evaluate generation 5824: reward = 291.44, steps = 1433\n",
      "18:29:53 [DEBUG] evaluate generation 5825: reward = 297.23, steps = 1329\n",
      "18:30:07 [DEBUG] evaluate generation 5826: reward = 293.07, steps = 1409\n",
      "18:30:20 [DEBUG] evaluate generation 5827: reward = 296.03, steps = 1363\n",
      "18:30:33 [DEBUG] evaluate generation 5828: reward = 297.67, steps = 1342\n",
      "18:30:46 [DEBUG] evaluate generation 5829: reward = 293.90, steps = 1382\n",
      "18:31:00 [DEBUG] evaluate generation 5830: reward = 293.68, steps = 1350\n",
      "18:31:13 [DEBUG] evaluate generation 5831: reward = 293.97, steps = 1353\n",
      "18:31:26 [DEBUG] evaluate generation 5832: reward = 290.50, steps = 1429\n",
      "18:31:39 [DEBUG] evaluate generation 5833: reward = 293.36, steps = 1412\n",
      "18:31:52 [DEBUG] evaluate generation 5834: reward = 295.44, steps = 1383\n",
      "18:32:06 [DEBUG] evaluate generation 5835: reward = 295.97, steps = 1371\n",
      "18:32:19 [DEBUG] evaluate generation 5836: reward = 294.05, steps = 1396\n",
      "18:32:33 [DEBUG] evaluate generation 5837: reward = 295.08, steps = 1386\n",
      "18:32:45 [DEBUG] evaluate generation 5838: reward = 295.76, steps = 1365\n",
      "18:32:58 [DEBUG] evaluate generation 5839: reward = 295.01, steps = 1404\n",
      "18:33:10 [DEBUG] evaluate generation 5840: reward = 293.58, steps = 1402\n",
      "18:33:23 [DEBUG] evaluate generation 5841: reward = 294.65, steps = 1409\n",
      "18:33:37 [DEBUG] evaluate generation 5842: reward = 296.21, steps = 1355\n",
      "18:33:49 [DEBUG] evaluate generation 5843: reward = 294.95, steps = 1389\n",
      "18:34:02 [DEBUG] evaluate generation 5844: reward = 296.50, steps = 1376\n",
      "18:34:15 [DEBUG] evaluate generation 5845: reward = 293.58, steps = 1403\n",
      "18:34:27 [DEBUG] evaluate generation 5846: reward = 297.21, steps = 1355\n",
      "18:34:41 [DEBUG] evaluate generation 5847: reward = 293.99, steps = 1434\n",
      "18:34:56 [DEBUG] evaluate generation 5848: reward = 295.87, steps = 1396\n",
      "18:35:09 [DEBUG] evaluate generation 5849: reward = 296.68, steps = 1363\n",
      "18:35:24 [DEBUG] evaluate generation 5850: reward = 296.43, steps = 1347\n",
      "18:35:38 [DEBUG] evaluate generation 5851: reward = 294.37, steps = 1434\n",
      "18:35:52 [DEBUG] evaluate generation 5852: reward = 295.39, steps = 1399\n",
      "18:36:06 [DEBUG] evaluate generation 5853: reward = 295.30, steps = 1388\n",
      "18:36:20 [DEBUG] evaluate generation 5854: reward = 298.35, steps = 1317\n",
      "18:36:34 [DEBUG] evaluate generation 5855: reward = 297.10, steps = 1333\n",
      "18:36:48 [DEBUG] evaluate generation 5856: reward = 295.83, steps = 1372\n",
      "18:37:02 [DEBUG] evaluate generation 5857: reward = 292.11, steps = 1430\n",
      "18:37:16 [DEBUG] evaluate generation 5858: reward = 299.01, steps = 1303\n",
      "18:37:30 [DEBUG] evaluate generation 5859: reward = 299.33, steps = 1289\n",
      "18:37:44 [DEBUG] evaluate generation 5860: reward = 299.57, steps = 1297\n",
      "18:37:58 [DEBUG] evaluate generation 5861: reward = 299.61, steps = 1297\n",
      "18:38:12 [DEBUG] evaluate generation 5862: reward = 299.28, steps = 1292\n",
      "18:38:24 [DEBUG] evaluate generation 5863: reward = 296.72, steps = 1341\n",
      "18:38:37 [DEBUG] evaluate generation 5864: reward = 296.98, steps = 1355\n",
      "18:38:49 [DEBUG] evaluate generation 5865: reward = 294.52, steps = 1371\n",
      "18:39:04 [DEBUG] evaluate generation 5866: reward = 297.67, steps = 1330\n",
      "18:39:16 [DEBUG] evaluate generation 5867: reward = 295.44, steps = 1362\n",
      "18:39:29 [DEBUG] evaluate generation 5868: reward = 296.64, steps = 1367\n",
      "18:39:43 [DEBUG] evaluate generation 5869: reward = 298.83, steps = 1314\n",
      "18:39:55 [DEBUG] evaluate generation 5870: reward = 297.50, steps = 1332\n",
      "18:40:08 [DEBUG] evaluate generation 5871: reward = 295.02, steps = 1372\n",
      "18:40:21 [DEBUG] evaluate generation 5872: reward = 295.44, steps = 1337\n",
      "18:40:35 [DEBUG] evaluate generation 5873: reward = 296.14, steps = 1370\n",
      "18:40:49 [DEBUG] evaluate generation 5874: reward = 299.52, steps = 1282\n",
      "18:41:01 [DEBUG] evaluate generation 5875: reward = 298.15, steps = 1320\n",
      "18:41:14 [DEBUG] evaluate generation 5876: reward = 298.34, steps = 1333\n",
      "18:41:28 [DEBUG] evaluate generation 5877: reward = 296.69, steps = 1355\n",
      "18:41:41 [DEBUG] evaluate generation 5878: reward = 111.66, steps = 896\n",
      "18:41:53 [DEBUG] evaluate generation 5879: reward = 297.74, steps = 1324\n",
      "18:42:08 [DEBUG] evaluate generation 5880: reward = 295.85, steps = 1368\n",
      "18:42:22 [DEBUG] evaluate generation 5881: reward = 298.36, steps = 1313\n",
      "18:42:36 [DEBUG] evaluate generation 5882: reward = 300.12, steps = 1299\n",
      "18:42:47 [DEBUG] evaluate generation 5883: reward = 294.26, steps = 1422\n",
      "18:43:00 [DEBUG] evaluate generation 5884: reward = 297.09, steps = 1360\n",
      "18:43:13 [DEBUG] evaluate generation 5885: reward = 294.86, steps = 1397\n",
      "18:43:27 [DEBUG] evaluate generation 5886: reward = 296.82, steps = 1360\n",
      "18:43:40 [DEBUG] evaluate generation 5887: reward = 295.68, steps = 1376\n",
      "18:43:55 [DEBUG] evaluate generation 5888: reward = 295.99, steps = 1360\n",
      "18:44:08 [DEBUG] evaluate generation 5889: reward = 298.93, steps = 1353\n",
      "18:44:22 [DEBUG] evaluate generation 5890: reward = 295.19, steps = 1365\n",
      "18:44:35 [DEBUG] evaluate generation 5891: reward = 296.42, steps = 1365\n",
      "18:44:49 [DEBUG] evaluate generation 5892: reward = 297.56, steps = 1316\n",
      "18:45:02 [DEBUG] evaluate generation 5893: reward = 297.71, steps = 1329\n",
      "18:45:16 [DEBUG] evaluate generation 5894: reward = 298.08, steps = 1332\n",
      "18:45:29 [DEBUG] evaluate generation 5895: reward = 295.87, steps = 1377\n",
      "18:45:43 [DEBUG] evaluate generation 5896: reward = 294.61, steps = 1379\n",
      "18:45:57 [DEBUG] evaluate generation 5897: reward = 295.93, steps = 1353\n",
      "18:46:09 [DEBUG] evaluate generation 5898: reward = 296.13, steps = 1337\n",
      "18:46:23 [DEBUG] evaluate generation 5899: reward = 295.99, steps = 1366\n",
      "18:46:36 [DEBUG] evaluate generation 5900: reward = 295.79, steps = 1386\n",
      "18:46:50 [DEBUG] evaluate generation 5901: reward = 294.19, steps = 1428\n",
      "18:47:05 [DEBUG] evaluate generation 5902: reward = 296.74, steps = 1330\n",
      "18:47:18 [DEBUG] evaluate generation 5903: reward = 297.52, steps = 1324\n",
      "18:47:32 [DEBUG] evaluate generation 5904: reward = 298.64, steps = 1301\n",
      "18:47:46 [DEBUG] evaluate generation 5905: reward = 299.20, steps = 1286\n",
      "18:47:57 [DEBUG] evaluate generation 5906: reward = 298.13, steps = 1298\n",
      "18:48:10 [DEBUG] evaluate generation 5907: reward = 299.94, steps = 1295\n",
      "18:48:22 [DEBUG] evaluate generation 5908: reward = 302.73, steps = 1235\n",
      "18:48:34 [DEBUG] evaluate generation 5909: reward = 300.65, steps = 1262\n",
      "18:48:47 [DEBUG] evaluate generation 5910: reward = 298.59, steps = 1316\n",
      "18:49:00 [DEBUG] evaluate generation 5911: reward = 299.07, steps = 1286\n",
      "18:49:13 [DEBUG] evaluate generation 5912: reward = 297.56, steps = 1326\n",
      "18:49:26 [DEBUG] evaluate generation 5913: reward = 294.31, steps = 1386\n",
      "18:49:39 [DEBUG] evaluate generation 5914: reward = 295.26, steps = 1367\n",
      "18:49:51 [DEBUG] evaluate generation 5915: reward = 293.20, steps = 1404\n",
      "18:50:03 [DEBUG] evaluate generation 5916: reward = 296.42, steps = 1328\n",
      "18:50:17 [DEBUG] evaluate generation 5917: reward = 295.30, steps = 1382\n",
      "18:50:31 [DEBUG] evaluate generation 5918: reward = 296.02, steps = 1360\n",
      "18:50:44 [DEBUG] evaluate generation 5919: reward = 296.14, steps = 1378\n",
      "18:50:57 [DEBUG] evaluate generation 5920: reward = 294.52, steps = 1385\n",
      "18:51:10 [DEBUG] evaluate generation 5921: reward = 293.89, steps = 1420\n",
      "18:51:25 [DEBUG] evaluate generation 5922: reward = 292.67, steps = 1430\n",
      "18:51:39 [DEBUG] evaluate generation 5923: reward = 297.71, steps = 1334\n",
      "18:51:51 [DEBUG] evaluate generation 5924: reward = 297.32, steps = 1330\n",
      "18:52:04 [DEBUG] evaluate generation 5925: reward = 298.42, steps = 1285\n",
      "18:52:18 [DEBUG] evaluate generation 5926: reward = 298.88, steps = 1293\n",
      "18:52:32 [DEBUG] evaluate generation 5927: reward = 296.46, steps = 1355\n",
      "18:52:44 [DEBUG] evaluate generation 5928: reward = 294.87, steps = 1347\n",
      "18:52:57 [DEBUG] evaluate generation 5929: reward = 298.16, steps = 1337\n",
      "18:53:10 [DEBUG] evaluate generation 5930: reward = 294.16, steps = 1391\n",
      "18:53:24 [DEBUG] evaluate generation 5931: reward = 294.91, steps = 1406\n",
      "18:53:37 [DEBUG] evaluate generation 5932: reward = 298.67, steps = 1334\n",
      "18:53:50 [DEBUG] evaluate generation 5933: reward = 297.80, steps = 1365\n",
      "18:54:04 [DEBUG] evaluate generation 5934: reward = 294.55, steps = 1376\n",
      "18:54:15 [DEBUG] evaluate generation 5935: reward = 295.32, steps = 1370\n",
      "18:54:29 [DEBUG] evaluate generation 5936: reward = 296.34, steps = 1380\n",
      "18:54:42 [DEBUG] evaluate generation 5937: reward = 296.67, steps = 1341\n",
      "18:54:55 [DEBUG] evaluate generation 5938: reward = 297.49, steps = 1322\n",
      "18:55:10 [DEBUG] evaluate generation 5939: reward = 296.73, steps = 1333\n",
      "18:55:23 [DEBUG] evaluate generation 5940: reward = 299.52, steps = 1331\n",
      "18:55:36 [DEBUG] evaluate generation 5941: reward = 299.32, steps = 1305\n",
      "18:55:49 [DEBUG] evaluate generation 5942: reward = 297.04, steps = 1394\n",
      "18:56:02 [DEBUG] evaluate generation 5943: reward = 297.48, steps = 1354\n",
      "18:56:14 [DEBUG] evaluate generation 5944: reward = 292.87, steps = 1414\n",
      "18:56:28 [DEBUG] evaluate generation 5945: reward = 292.86, steps = 1470\n",
      "18:56:42 [DEBUG] evaluate generation 5946: reward = 295.09, steps = 1385\n",
      "18:56:56 [DEBUG] evaluate generation 5947: reward = 298.75, steps = 1359\n",
      "18:57:10 [DEBUG] evaluate generation 5948: reward = 295.02, steps = 1384\n",
      "18:57:24 [DEBUG] evaluate generation 5949: reward = 298.94, steps = 1338\n",
      "18:57:38 [DEBUG] evaluate generation 5950: reward = 293.65, steps = 1419\n",
      "18:57:52 [DEBUG] evaluate generation 5951: reward = 295.97, steps = 1375\n",
      "18:58:06 [DEBUG] evaluate generation 5952: reward = 298.42, steps = 1326\n",
      "18:58:19 [DEBUG] evaluate generation 5953: reward = 297.65, steps = 1349\n",
      "18:58:32 [DEBUG] evaluate generation 5954: reward = 296.27, steps = 1397\n",
      "18:58:46 [DEBUG] evaluate generation 5955: reward = 296.12, steps = 1403\n",
      "18:58:58 [DEBUG] evaluate generation 5956: reward = 298.98, steps = 1350\n",
      "18:59:11 [DEBUG] evaluate generation 5957: reward = 296.90, steps = 1385\n",
      "18:59:26 [DEBUG] evaluate generation 5958: reward = 297.45, steps = 1379\n",
      "18:59:40 [DEBUG] evaluate generation 5959: reward = 295.54, steps = 1414\n",
      "18:59:55 [DEBUG] evaluate generation 5960: reward = 296.70, steps = 1398\n",
      "19:00:09 [DEBUG] evaluate generation 5961: reward = 296.38, steps = 1387\n",
      "19:00:23 [DEBUG] evaluate generation 5962: reward = 297.77, steps = 1337\n",
      "19:00:36 [DEBUG] evaluate generation 5963: reward = 295.50, steps = 1401\n",
      "19:00:49 [DEBUG] evaluate generation 5964: reward = 296.13, steps = 1369\n",
      "19:01:03 [DEBUG] evaluate generation 5965: reward = 298.65, steps = 1331\n",
      "19:01:17 [DEBUG] evaluate generation 5966: reward = 296.79, steps = 1367\n",
      "19:01:31 [DEBUG] evaluate generation 5967: reward = 296.70, steps = 1366\n",
      "19:01:45 [DEBUG] evaluate generation 5968: reward = 298.30, steps = 1314\n",
      "19:01:59 [DEBUG] evaluate generation 5969: reward = 296.73, steps = 1365\n",
      "19:02:13 [DEBUG] evaluate generation 5970: reward = 297.31, steps = 1312\n",
      "19:02:26 [DEBUG] evaluate generation 5971: reward = 298.34, steps = 1311\n",
      "19:02:40 [DEBUG] evaluate generation 5972: reward = 298.19, steps = 1322\n",
      "19:02:52 [DEBUG] evaluate generation 5973: reward = 300.36, steps = 1310\n",
      "19:03:06 [DEBUG] evaluate generation 5974: reward = 298.03, steps = 1337\n",
      "19:03:20 [DEBUG] evaluate generation 5975: reward = 295.84, steps = 1394\n",
      "19:03:32 [DEBUG] evaluate generation 5976: reward = 298.51, steps = 1336\n",
      "19:03:46 [DEBUG] evaluate generation 5977: reward = 298.31, steps = 1339\n",
      "19:03:58 [DEBUG] evaluate generation 5978: reward = 296.18, steps = 1376\n",
      "19:04:11 [DEBUG] evaluate generation 5979: reward = 296.99, steps = 1380\n",
      "19:04:26 [DEBUG] evaluate generation 5980: reward = 297.95, steps = 1353\n",
      "19:04:39 [DEBUG] evaluate generation 5981: reward = 297.32, steps = 1369\n",
      "19:04:53 [DEBUG] evaluate generation 5982: reward = 298.71, steps = 1350\n",
      "19:05:06 [DEBUG] evaluate generation 5983: reward = 298.49, steps = 1349\n",
      "19:05:19 [DEBUG] evaluate generation 5984: reward = 297.48, steps = 1390\n",
      "19:05:33 [DEBUG] evaluate generation 5985: reward = 300.37, steps = 1313\n",
      "19:05:47 [DEBUG] evaluate generation 5986: reward = 299.50, steps = 1340\n",
      "19:06:02 [DEBUG] evaluate generation 5987: reward = 298.07, steps = 1354\n",
      "19:06:15 [DEBUG] evaluate generation 5988: reward = 298.35, steps = 1338\n",
      "19:06:28 [DEBUG] evaluate generation 5989: reward = 299.90, steps = 1329\n",
      "19:06:41 [DEBUG] evaluate generation 5990: reward = 299.56, steps = 1317\n",
      "19:06:54 [DEBUG] evaluate generation 5991: reward = 301.83, steps = 1283\n",
      "19:07:06 [DEBUG] evaluate generation 5992: reward = 301.04, steps = 1308\n",
      "19:07:20 [DEBUG] evaluate generation 5993: reward = 301.56, steps = 1265\n",
      "19:07:34 [DEBUG] evaluate generation 5994: reward = 302.55, steps = 1266\n",
      "19:07:34 [INFO] ==== test ====\n",
      "19:07:35 [DEBUG] test episode 0: reward = 301.18, steps = 1288\n",
      "19:07:36 [DEBUG] test episode 1: reward = 298.62, steps = 1307\n",
      "19:07:36 [DEBUG] test episode 2: reward = 300.47, steps = 1281\n",
      "19:07:37 [DEBUG] test episode 3: reward = 302.68, steps = 1248\n",
      "19:07:38 [DEBUG] test episode 4: reward = 300.17, steps = 1306\n",
      "19:07:39 [DEBUG] test episode 5: reward = 302.07, steps = 1274\n",
      "19:07:39 [DEBUG] test episode 6: reward = 300.19, steps = 1303\n",
      "19:07:40 [DEBUG] test episode 7: reward = 298.52, steps = 1333\n",
      "19:07:41 [DEBUG] test episode 8: reward = 300.80, steps = 1284\n",
      "19:07:42 [DEBUG] test episode 9: reward = 301.59, steps = 1293\n",
      "19:07:42 [DEBUG] test episode 10: reward = 300.77, steps = 1305\n",
      "19:07:43 [DEBUG] test episode 11: reward = 300.82, steps = 1312\n",
      "19:07:44 [DEBUG] test episode 12: reward = 300.89, steps = 1298\n",
      "19:07:45 [DEBUG] test episode 13: reward = 301.28, steps = 1290\n",
      "19:07:45 [DEBUG] test episode 14: reward = 298.41, steps = 1345\n",
      "19:07:46 [DEBUG] test episode 15: reward = 296.75, steps = 1377\n",
      "19:07:47 [DEBUG] test episode 16: reward = 299.65, steps = 1314\n",
      "19:07:48 [DEBUG] test episode 17: reward = 300.17, steps = 1288\n",
      "19:07:48 [DEBUG] test episode 18: reward = 299.81, steps = 1308\n",
      "19:07:49 [DEBUG] test episode 19: reward = 299.65, steps = 1338\n",
      "19:07:50 [DEBUG] test episode 20: reward = 299.86, steps = 1302\n",
      "19:07:51 [DEBUG] test episode 21: reward = 300.82, steps = 1298\n",
      "19:07:51 [DEBUG] test episode 22: reward = 302.72, steps = 1269\n",
      "19:07:52 [DEBUG] test episode 23: reward = 300.71, steps = 1287\n",
      "19:07:53 [DEBUG] test episode 24: reward = 302.44, steps = 1293\n",
      "19:07:54 [DEBUG] test episode 25: reward = 298.99, steps = 1319\n",
      "19:07:54 [DEBUG] test episode 26: reward = 302.86, steps = 1256\n",
      "19:07:55 [DEBUG] test episode 27: reward = 299.71, steps = 1293\n",
      "19:07:56 [DEBUG] test episode 28: reward = 300.15, steps = 1307\n",
      "19:07:57 [DEBUG] test episode 29: reward = 301.43, steps = 1302\n",
      "19:07:57 [DEBUG] test episode 30: reward = 299.57, steps = 1324\n",
      "19:07:58 [DEBUG] test episode 31: reward = 302.86, steps = 1233\n",
      "19:07:59 [DEBUG] test episode 32: reward = 301.33, steps = 1302\n",
      "19:08:00 [DEBUG] test episode 33: reward = 300.87, steps = 1301\n",
      "19:08:00 [DEBUG] test episode 34: reward = 299.52, steps = 1322\n",
      "19:08:01 [DEBUG] test episode 35: reward = 302.25, steps = 1283\n",
      "19:08:02 [DEBUG] test episode 36: reward = 301.78, steps = 1247\n",
      "19:08:02 [DEBUG] test episode 37: reward = 301.74, steps = 1260\n",
      "19:08:03 [DEBUG] test episode 38: reward = 301.32, steps = 1276\n",
      "19:08:04 [DEBUG] test episode 39: reward = 301.27, steps = 1263\n",
      "19:08:05 [DEBUG] test episode 40: reward = 302.27, steps = 1266\n",
      "19:08:05 [DEBUG] test episode 41: reward = 298.95, steps = 1314\n",
      "19:08:06 [DEBUG] test episode 42: reward = 297.78, steps = 1335\n",
      "19:08:07 [DEBUG] test episode 43: reward = 299.95, steps = 1300\n",
      "19:08:08 [DEBUG] test episode 44: reward = 301.37, steps = 1291\n",
      "19:08:08 [DEBUG] test episode 45: reward = 300.67, steps = 1291\n",
      "19:08:09 [DEBUG] test episode 46: reward = 301.68, steps = 1300\n",
      "19:08:10 [DEBUG] test episode 47: reward = 299.02, steps = 1338\n",
      "19:08:11 [DEBUG] test episode 48: reward = 299.65, steps = 1337\n",
      "19:08:11 [DEBUG] test episode 49: reward = 300.39, steps = 1309\n",
      "19:08:12 [DEBUG] test episode 50: reward = 299.65, steps = 1324\n",
      "19:08:13 [DEBUG] test episode 51: reward = 300.96, steps = 1291\n",
      "19:08:14 [DEBUG] test episode 52: reward = 300.08, steps = 1326\n",
      "19:08:14 [DEBUG] test episode 53: reward = 302.63, steps = 1245\n",
      "19:08:15 [DEBUG] test episode 54: reward = 303.21, steps = 1273\n",
      "19:08:16 [DEBUG] test episode 55: reward = 299.81, steps = 1300\n",
      "19:08:17 [DEBUG] test episode 56: reward = 299.76, steps = 1310\n",
      "19:08:17 [DEBUG] test episode 57: reward = 302.05, steps = 1281\n",
      "19:08:18 [DEBUG] test episode 58: reward = 299.68, steps = 1316\n",
      "19:08:19 [DEBUG] test episode 59: reward = 304.62, steps = 1239\n",
      "19:08:20 [DEBUG] test episode 60: reward = 302.48, steps = 1285\n",
      "19:08:20 [DEBUG] test episode 61: reward = 300.83, steps = 1295\n",
      "19:08:21 [DEBUG] test episode 62: reward = 300.80, steps = 1307\n",
      "19:08:22 [DEBUG] test episode 63: reward = 302.88, steps = 1264\n",
      "19:08:22 [DEBUG] test episode 64: reward = 300.92, steps = 1284\n",
      "19:08:23 [DEBUG] test episode 65: reward = 300.47, steps = 1320\n",
      "19:08:24 [DEBUG] test episode 66: reward = 301.43, steps = 1275\n",
      "19:08:25 [DEBUG] test episode 67: reward = 300.30, steps = 1302\n",
      "19:08:25 [DEBUG] test episode 68: reward = 299.35, steps = 1342\n",
      "19:08:26 [DEBUG] test episode 69: reward = 301.92, steps = 1293\n",
      "19:08:27 [DEBUG] test episode 70: reward = 301.02, steps = 1300\n",
      "19:08:28 [DEBUG] test episode 71: reward = 302.69, steps = 1268\n",
      "19:08:28 [DEBUG] test episode 72: reward = 299.04, steps = 1337\n",
      "19:08:29 [DEBUG] test episode 73: reward = 300.01, steps = 1324\n",
      "19:08:30 [DEBUG] test episode 74: reward = 303.17, steps = 1255\n",
      "19:08:30 [DEBUG] test episode 75: reward = 301.06, steps = 1313\n",
      "19:08:31 [DEBUG] test episode 76: reward = 301.29, steps = 1306\n",
      "19:08:32 [DEBUG] test episode 77: reward = 300.51, steps = 1299\n",
      "19:08:33 [DEBUG] test episode 78: reward = 302.35, steps = 1276\n",
      "19:08:33 [DEBUG] test episode 79: reward = 300.59, steps = 1298\n",
      "19:08:34 [DEBUG] test episode 80: reward = 301.29, steps = 1267\n",
      "19:08:35 [DEBUG] test episode 81: reward = 300.78, steps = 1280\n",
      "19:08:36 [DEBUG] test episode 82: reward = 301.19, steps = 1301\n",
      "19:08:36 [DEBUG] test episode 83: reward = 302.41, steps = 1274\n",
      "19:08:37 [DEBUG] test episode 84: reward = 300.37, steps = 1308\n",
      "19:08:38 [DEBUG] test episode 85: reward = 302.24, steps = 1258\n",
      "19:08:39 [DEBUG] test episode 86: reward = 301.79, steps = 1291\n",
      "19:08:39 [DEBUG] test episode 87: reward = 299.55, steps = 1290\n",
      "19:08:40 [DEBUG] test episode 88: reward = 301.14, steps = 1264\n",
      "19:08:41 [DEBUG] test episode 89: reward = 298.88, steps = 1322\n",
      "19:08:41 [DEBUG] test episode 90: reward = 300.41, steps = 1312\n",
      "19:08:42 [DEBUG] test episode 91: reward = 299.78, steps = 1327\n",
      "19:08:43 [DEBUG] test episode 92: reward = 301.35, steps = 1301\n",
      "19:08:44 [DEBUG] test episode 93: reward = 299.24, steps = 1343\n",
      "19:08:44 [DEBUG] test episode 94: reward = 303.10, steps = 1254\n",
      "19:08:45 [DEBUG] test episode 95: reward = 302.52, steps = 1284\n",
      "19:08:46 [DEBUG] test episode 96: reward = 302.86, steps = 1263\n",
      "19:08:46 [DEBUG] test episode 97: reward = 301.20, steps = 1282\n",
      "19:08:47 [DEBUG] test episode 98: reward = 300.86, steps = 1294\n",
      "19:08:48 [DEBUG] test episode 99: reward = 299.66, steps = 1311\n",
      "19:08:48 [INFO] average episode reward = 300.85 ± 1.34\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAD4CAYAAAAEhuazAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAg2klEQVR4nO3deXhV5bn+8e+TkAQSCFMSCGEIoxWQoY0oBUVEC07FDiraFk/rKR0cWtvTFmp7bP2V1tbaeur0q9rBnrYqbR2rVRGhastgQFBmgiAEkBCQIcSEDO/5Yy9iEhJIsoe1917357q4ste71/C8Ae699rsmc84hIiLBkuJ3ASIiEnsKfxGRAFL4i4gEkMJfRCSAFP4iIgHUye8C2ionJ8cVFhb6XYaISEJZuXJluXMut3l7woR/YWEhxcXFfpchIpJQzOydlto17CMiEkAKfxGRAFL4i4gEkMJfRCSAFP4iIgGk8BcRCaCww9/MOpvZCjNbY2brzOyHXnsvM1toZlu8nz0bLTPPzErMbJOZTQ+3BhERaZ9I7PlXA+c758YC44AZZnY2MBdY5JwbDizypjGzkcAsYBQwA7jPzFIjUIeISEJbse0Am9490jD9Vukh/nfZO1TV1EV8W2GHvwup8CbTvD8OmAk87LU/DFzuvZ4JPOqcq3bObQNKgAnh1iEiEo/q65s+M6Wqpo6j1bWUHalq0r51XwVX/nop0+96BYD3jh7jsnte4/tPriXFLOJ1ReQKX2/PfSUwDLjXObfczPo45/YAOOf2mFmeN3sBsKzR4qVem4iIL5a/vZ/eXTMYlteV+npHbb0jvVPH9o2dc5SUVTAsryv3LdnKHS9swgwuOL0P3/zYCGbc9WrDvFcW9Wf6qL5c93DTuxcUzn22yXRHazmZiIS/c64OGGdmPYAnzGz0SWZv6SOsxceJmdkcYA7AwIEDwy1TRJKEcw47xd7w82v3MKBXJj0y0yno0aXFed7eV8Gug+/zud+sAGD77Zfwg2fW8Yel77D8u9Ood467Xy7hhx8fRVpqCmVHqshITWX6Xa9w16xxPLpiB0+u3g1Az8w0LhmTzx+X7WihXli4fi8L1+9t0r6guJQFxaUd+RWEzSL9GEczuxU4CnwROM/b688HljjnTjOzeQDOuZ94878A/MA5t/Rk6y0qKnK6t4+I/O+yd/j+k2sB+I+PFpLTNZ2fv7iZP3xhAvuPVjP1tDwMY+xtLzYss/XHF7N1XwUf++UrHd5ududOHK6qDbv+9vrZp8Zw5ZkDOry8ma10zhWd0B5u+JtZLlDjnDtoZl2AF4GfAlOA/c65281sLtDLOfdtMxsF/JnQOH8/QgeDh3vfHlql8BdJTnsPV2FAXnbnJu1FP3qJ8opqtv3kYp5cvYubH1vDjFF9eX7du+3eRq+sdA4cPRahimNn8X+dx+CcrLDW0Vr4R2LYJx942Bv3TwEWOOf+bmZLgQVmdh2wA7gCwDm3zswWAOuBWuD6UwW/iCSvs368CAgNuRy370g15RXVALy8sYybH1sD0KHgB+Iu+G+5+HSmj+rLuXcsbtJ+x6fHcO/iEs4dkcuM0X3DDv6TCTv8nXNvAuNbaN8PTGtlmfnA/HC3LSLJ6fFVH4yDb9p75CRzJoYJg3sxZUQued0yuHx8AWmpoQO437xwBN0z01i4fi+vbiknL7szS741NSY16QpfEYk7jY/lPvnGrg6v59Ix+U2mr586lHkXfQiAL54zmOduOgeAPtkZTebbcNsMNv6/GZxR0L1J+yNfPJucrqF5C3p04Z5rxjdMAwzoFTqwfNP5wxrazh7SiwVfmsj1U4dxRdGAhuAHuHHacGZPLOxw/8KRMA9zEZHgaHxe++a9FSeZMyQ9NYXN8y864RTJ/5k1ngtO78PXH1vNyu9dQG8vqL80ZWjDPMeHm+5fspWfPr+RL00ZQpf00HWnz9w4mZn3/os1Ow8CMHFob5bOO597Xi7hC5MH071LGucMz+VodS39mp1RdPbQ3lzz4PL2dz5GFP4ikvCevWlyq+9dPr6Ay8dH7lKitNQUbr5wRMN09y5pdO+SFrH1x4qGfUQk4UXhAtgPdPSMyMieRR9xCn8RiSsLXt/Jj57d0M6lopn+yUnhLyJx4Wh16AKqb//tzXYvm9JK9kfkI6GjXyvi/PNI4S8icWHUrS+ccBO0tspOwDF3vyn8RSRu1HYw/Bufbilto/AXkbhRH+F7jUnrdKqniPjmzdKDTabfPVTV8oytGNO/O53TIvwsqIB8/ij8RcQ3H7/nX02mz/v5knYtf/OFI5h6Wt6pZ2yDqJ4uGoc07CMivnhl876ob6M9gR60ESeFv4jEnHOO2b9d0eHlzx2RG8FqmgnINwCFv4jE3CtbysNaPiD5HFUKfxGJqWO19Vzbxr3+gb0ym0wPy+vK+tumR6OswFH4i0hMHaurb/O8zR9c3r1LGpnpOk8lEvRbFJGYufLXS5k5rl+b529+xW/PzPRIl3RqSXokWOEvIjHhnGPFtgOs2Hagzcv0ykrn7fKjDdM/v2JMNEoLJA37iEjUrdl5kKfX7G73cs3vw9+jnXv+FomT95P0AgDt+YtIVJRXVJOWmsJld7/GjgOVHVrHpz/SnyuLBjDie/+IcHWi8BeRsJUdqWLanf9k2bxpZGWEYqXoRy+RlZ7K0WN1Ya37+EHftNTk3AP3i4Z9RCRs0+78J0eqarn4V68CsM0bpw83+I8fa13+3Wm8fssFYa1LmtKev4iE7UhV6EEs7+yvpKK6ls/9JjIPLu+cFto/7ZPdOSLra5PmJ/ck6dk+2vMXkYgafesLHH6/psPLD8/r2vA6IgdspUUKfxGJuKratl/IBTB74iAARvTpysJvTIlGSW3X/PMmST+AFP4iEnHH2hn+nzlrUJQqkdYo/EVEAkgHfEWkQ2rr6nm7/ChPr27/xVun8pcvT+RQZcePG8ipKfxFpEN+/NxGfvuvbR1e/o/XncVnWzkr6MzCXh1eb8TpbB8RkQ+EE/wAk4fnRKgS6QiFv4i028HKY2EtP6pfdoQqiYEkPdtHwz4i0mafeWgZpe+9z/C8bmGtJ797DC/akhZpz19E2uxfJft5Z38lL23Y2+ZlhuRk8ZcvT2zWmpx704lE4S+SgI5U1VA491n+vHxHQ9vdi7bw0Z8sito2mz9Ypa2+PGUoZxR0b9J2tLq2ybQ74Z4KEm0Kf5EEtOdQFQC/a3TQ9c6Fm9nttUfSofdr+K+/rOH5de92eB1pqU2jZunb+8MtK3oC8jmk8BdJQFU1obtlbimroHDuszy+qjQi631q9S6eafbQlbE/fJG/rizlq39a1bGVGqSmGM/cMJlJw3q3Mov/w0BJely3VQp/kQT0/afWNZn+xoI1Da9f21Le4fV+7dHV3PjIGw3Ta3cd6vC6mjujf3d/nsHbRhE/nT/Ov0Eo/EUSTNmRKtbvbj2UW7twqr2Wvb2fS+9+LSLrOu6qMwcAMHZAjybtcTXmH5BvAGGHv5kNMLPFZrbBzNaZ2de89l5mttDMtng/ezZaZp6ZlZjZJjObHm4NIkEyYf4iauraHpZPvrHrhAOsbfHQq+FdxNWgUakDe2UC8LGRfSKz7ngW5x8ikTjPvxb4pnNulZl1A1aa2ULgP4BFzrnbzWwuMBf4jpmNBGYBo4B+wEtmNsI5F94jf0SkiU3vHmH6Xa8A8MnxBfziqnHtWr49p3O21aDeWSyddz59ujU9zz8exvyDJuw9f+fcHufcKu/1EWADUADMBB72ZnsYuNx7PRN41DlX7ZzbBpQAE8KtQ0SaWrXjvYbX7x5u31lART9a2K75Mzq1PUryu3chJUVh77eIXuFrZoXAeGA50Mc5twdCHxBmlufNVgAsa7RYqdfW0vrmAHMABg4cGMlSRZJaXb3j9e0HGqYrW3mW7sHKY2wrP8r4gT2btJdXtO32DQ9/YQLD87rSr0cXCuc+2/JMbcj5uBrzD4iIHfA1s67A34CvO+cOn2zWFtpa/Jt3zj3gnCtyzhXl5uZGokyRQPjZ8xt5fNWuhunVOw82nB7a2NUPLucT9/2bR1bsOOG9tpgyIpd+Pbp0uM7mNPwTOxEJfzNLIxT8f3LOPe417zWzfO/9fKDMay8FBjRavD8Q+RuCiwTYyxvLTmg7/pD1xjbsCe2nzXv8LXbsrwxrmzldwz+NU98AYicSZ/sY8Btgg3PuF43eehq41nt9LfBUo/ZZZpZhZoOB4cCKcOsQkZOrrT/5oxXPvWNxu9Z36Zj8JtNP3TC55RnbkOfa44+9SOz5TwI+B5xvZqu9PxcDtwMXmtkW4EJvGufcOmABsB54HrheZ/qIRNaWsooT2raVH43Y+mdPHMT/zBrfpK2gRxf+/2c/3KH1aY8/9sI+4Ouce43WD+lMa2WZ+cD8cLctIm13zYPL2X77Je1eLrtzJ3551Tiue7iYX141lkvO6EdaqmEt3A9hxuj8E1fQjp16fQOIHd3PXyRB1NU77n55S1jreHnjXqpr6umSnkput4w2LfPmD0LXYXbkg6O94uIbQByUEAsKf5EE8dxbe7jrpfDC/wu/L45QNZGlPf7Y0719RBJETd3JD9jGrTbsScfFHv9xAfkcUviLSNzQN4DYUfiLSNyIq28ASU7hLyLR1Yadee3xx57CX0R8F9d7/BF/ykt8UPiLSNzQN4DYUfiLSHQl+o5zkj7cV+EvInEjrod/kozCX0QkgBT+IhJdurdPXFL4i4g0FpCRJ4W/iMQNP8f8k/S4bqsU/iISXQmyJ52kp/O3SuEvInEjLsb846CEWFD4i0h0BSRME43CX0Tihs7zjx2Fv4hIACn8RSRuxMWYf0Ao/EUkujSSE5cU/iIiAaTwF5HoasNITt/szgBcUdQ/ysXEUJx/4+nkdwEiIj0y09n644tJ0ZB/zCj8RSQupCZb8sd5dzTsIyLRFefDHydItHo7SOEvIhJACn8Ria44H/44QaLV20EKfxGRAFL4i4gEkMJfRCSAFP4iEl0JfvZMTla63yVEhcJfROQk7rxyrN8lRIXCX0SiK8HPnumRqT1/EYmxvYerKJz7LAuKd/pdiiQZhb9IHNu6rwKAx1eV+lxJYrnl4tN54qsf9buMNkux2H89UviLxLMEP1jql55Z6Ywf2NPXGk7vmw3AF88Zcsp5f/qpMcyeOIhJQ3tHu6wGurGbSALQE64ST8+sdLbffkmb5u3bvTO3zRwd5Yqaisiev5n91szKzGxto7ZeZrbQzLZ4P3s2em+emZWY2SYzmx6JGkREIiIg37YiNezze2BGs7a5wCLn3HBgkTeNmY0EZgGjvGXuM7PUCNUhklQCkkPig4iEv3PuFeBAs+aZwMPe64eByxu1P+qcq3bObQNKgAmRqEMkWflwPDC4AvK7juYB3z7OuT0A3s88r70AaHzeWqnXdgIzm2NmxWZWvG/fviiWKiISLH6c7dPS52qL326dcw8454qcc0W5ublRLktEoiIKY1cXnB7alxzcOyvyKw+IaJ7ts9fM8p1ze8wsHyjz2kuBAY3m6w/sjmIdIgnLadC/RZ89exAzxxeQ3TnN71ISVjT3/J8GrvVeXws81ah9lpllmNlgYDiwIop1iCS8hB7zj0LtZqbgD1NE9vzN7BHgPCDHzEqBW4HbgQVmdh2wA7gCwDm3zswWAOuBWuB651xdJOoQEZG2iUj4O+eubuWtaa3MPx+YH4ltiyQzp5M9JUp0eweRBBDrK3wvH9cvptuT2FP4i8Qxvw74njm4lz8blphR+IuIBJBu7CYi0lgL37aevmESOV0zYl9LFCn8RRJAQp/qmQTG9O/hdwkRp2EfkTj2+vbQLbN0sVcMBeSDVuEvEsfufrkEgOra2F4Kow+b5KfwF0kAifwwl5H52X6XIC1Q+IvICSJ5jGF0QffIrUwiRuEvIhJACn8RkQBS+IskgsQd8pc4pfAXEQkghb+ISAAp/EUSgEZ9JNIU/iIiAaTwF0kAuuA2hgLyy1b4i8SBR1bsYNfB9/0uo0E4t3eY/4nRPHfTOZErRqJCd/UU8dnhqhrmPf4Whb0zWfKtqX6XE7bPnDXI7xLCE5ADLNrzF/GZqw/9PHD02Elmin4dN0wd1vBat5BOfgp/EQFg1oQBfpcgMaTwFxEJIIW/iEgAKfxF5ATjBvTwuwSJMoW/SAJwMTz5vE92BqP66R78yU7hLyJNdEpRLASBzvMXiUP19Y77lpQ0TFdU11FTV+9jRZJs9BEvEoeWbC7j5y9ubpjesOcw3/nbWz5WJMlG4S8Sh47VBuQGM+Ibhb+ISAAp/EUECO9mbpJ4FP4iIgGk8BcRQDdzCxqFv4hIACn8ReKSBuAluhT+IiIBpPAXiUsagJfoUviLxIkj1bWs233Im4r9sI9O9QwW38LfzGaY2SYzKzGzuX7VIRIvnINLfvWa32VIQPgS/maWCtwLXASMBK42s5F+1CIiMOfcITrVM2D82vOfAJQ45952zh0DHgVm+lSLSOBNGpbjdwkR8ckPF/hdQsLwK/wLgJ2Npku9tibMbI6ZFZtZ8b59+2JWnEhztzzxFnP/9mZMtlVRXRuT7SSjT47vD0DRoJ4dX0lAjn34Ff4tfcE84VfunHvAOVfknCvKzc2NQVkiH6iqqWPH/koA/rR8B4++vvMUS0TG6Ftf4P4lW2OyrWQzeXgO22+/hMKcLL9LiXt+hX8pMKDRdH9gt0+1iLTopkfe4Nw7FvvyEJU1pYdOPZNER0COffgV/q8Dw81ssJmlA7OAp32qRaRFSzaFhhrrdQ6kJCFfHuPonKs1sxuAF4BU4LfOuXV+1CIiIb2y0gH4/KRCfwuRmPDtGb7OueeA5/zavki8eGtXfAzxZKZ3Yvvtl/hdhsSIrvAV8dm9i0tOPZNIhCn8RUQCSOEv4rPq2jq/S5AAUviL+GzVjoN+lyAB5NsBXxGBN3a853cJUZOVnspVZw70uwxphcJfOuTfW8sZ078HXTP0Tygc6/cc9nX75wzP4dUt5VFZ97rbZkRlvRIZGvaRdis7XMU1Dy7n64+u9ruUhGdBuZxU4o7CX9rt/ZrQAcrNe4/4XImIdJTCX+QUdHcHSUYKfxGRAFL4i7TCxeDG7np6lvhF4S8iEkAKf5FWxOJMHO34i18U/iKnoAO+kowU/tJuCsPI+ePyd/wuQQJK4S/ik0Ub9rJ2l79X+EpwKfyl3YJ2hkq0zvr5wTP+Przu+qlDfd2++EvhL+KTssPVvm7/W9M/5Ov2xV8KfxGfxMM3qC9MHgzAqH7ZPlcisaZbMoqcQrQOcMfDTd2mnpan5/YGlMJfxAcLXt/ZcIM8iQ+zJw7i7X0VfPW8YX6XEhMKfxEf/PKlzX6XIM1kZXTijivG+l1GzGjMX+QUojHqs+dQVRTWKtJ2Cn8RkQDSsI9IjBypqqG2zlGpsX6JAwp/kVNwETrd56wfL6LyWB1XFQ2IyPqSwUvfOJfMdMWQH/RbF4mRymOhPf7Hinf6XAk8OLuoTfP1ze4c1TqG5XWL6vqldQp/aTfd2C1xvXjzuWSmp9K/Z+Yp531sztkMzs2KQVXiB4W/yCkk02fdiD5t39M+a0jvKFYiftPZPtJu8XBbgkRx/HjB/gp/7+Mj0pz2/EWi4FBlDTc8sopXt5Rz99XjufGRN3yt5/uXjuTTH+nvaw0SXxT+IqfQ3mMcizeV8fnfvd4w/d9PrY1wRe2X2y2D7l3S/C5D4ojCXyQC1u46xMfveY36Fj4o3qusifj2HpxdxBf/UNymeX9x5VguG5Mf8RoksSn8A8Y5h5lRV++ora/noVe3cf+SrQzN68qanQeZOKQ3S9/e3+KyZxR0561dh+idlQ7AjgOVPL1mN+t2H2LfkWo6pRh9u3dhTEF3Fm8q40/Ld7S4no+N7ENFdS0Th/TmzoWbKRrUEwesfOe9DvUpNSXUn2gZ+8MXG14Xzn02attpjwtH9jnlPMXfu4CcrhkxqEYSkcI/gVTV1DHxJ4uisie5ZudBgFaDH+CtXYcA2H/0WEPbTR0Yy35x/V4A/r01tK3iDob+cdEM/kh5aHYRb+x8j1c2l/PMjZMBqKmrZ/gt/4jaNrM7a5hHWqfwj2Pbyo9S2DuTmjpHisHUny+JSvAnokvH5FNSVsHGd4+c8N7XLxjOXS9tAeC803Lp0SWNC0f25fo/r+JLU4ZQXVPPX1eWUlFdy+yJg5h6Wh7D8rqSkmLcv6SEi8/I55XN5fTJzuCc4Tk4FzrD6dD7NfTMTOe9yhrKK6r5w9Lt9MnuzDUTBpKZ3onXtx+g8lgdw/O6MrB3Jmt3HaLyWB2ThuUwOCeLC0b24VvTP6gzLTWF/O6duW7yYH707IY2933ah/IA+Mp5Q3lkxQ4OtvJvIkVnZclJWKQuXY+2oqIiV1zctjHOxmrr6tn47hEG9s6M6J7QguKdfPuvbzIkN4v87p359Ef6c9HofDqnpbLp3SPc9dJmvnLeUEb3605KilFRXUuXtFQAjtXW0yU99Pr4MMzxWod1cE/wl1eN5Xf/2s6bpYdYNm8a/1i7hxQzCnp0YUCvTF7dso8UMz4/qRAzY8mmMu5bvJXX3znAhttm0Nmrbc+h90lNMfK6fXBl53Nv7SGvWwZvlx/lyjbemuBQZQ3v19SR3imF2rp68rI7U11bx2tbyika1IusjFTeq6whKyOVjE6plJRVUFVTx/b9R5k0LIe01BRSDFLMyExP5d9b91Nb75gyIveEbZWUVTA0N4vDVbWkp6Y0/G4TSV2941htPakpxuJNZXTL6ETlsToqa+rYd6SarPRUigp70SnFKMxpeuHVq1v28bnfrABCQ2o9M9O55dLTtecvAJjZSufcCZd0J334j7vtxZPuGTUfMfjmhSOYNWEgNz+2mi9PGYrDkZmeyqfuX9qRsvnPyYN56LVtHVq2re65ZjyXjukX1W1IfDt+LEJP5ZLmWgv/sIZ9zOwK4AfA6cAE51xxo/fmAdcBdcBNzrkXvPaPAL8HugDPAV9zUfwE+sxZA7l38dYW32tpqPjOhZu5c2HoQRuvlZSHvf1wgv+jQ3vz7637uWnacG48fxjPr32XPy57h5njCuiTncG000990E9EpCXhjvmvBT4J/Lpxo5mNBGYBo4B+wEtmNsI5VwfcD8wBlhEK/xlA1I56fXnK0Ibw/9mnx/DEql3ccP4wPvPQ8hPmTU0xxvbvzqodB1td32vfmcqOA5Xs2F/JpGE55GVnkJ6aQm29Y/XOg6zffZi01BQ+Mb6APYfe5/w7/wnAlBG5/HPzvpPWOnNcP75/6cgmZ2jU1NWTlhq6EPuysf24bKz28EUkfGGFv3NuA9AwZt3ITOBR51w1sM3MSoAJZrYdyHbOLfWW+wNwOVEM/5RGtV1ZNKBhzLrx1+O9h6soKatg0rCcVtdz/Gt176wM+vfM5KNDm76flmqcWdiLMwt7NbQNye0a9tfw48EvIhJJ0Trbp4DQnv1xpV5bjfe6eXuLzGwOoW8JDBw4sEOFpLThRjR9sjvTp423rtV9bUQkGZwy/M3sJaBvC2/d4px7qrXFWmhzJ2lvkXPuAeABCB3wPUWpLReisBYROcEpw985d0EH1lsKND4nsD+w22vv30J71LRlz7899GEiIskgWgPKTwOzzCzDzAYDw4EVzrk9wBEzO9tCBwpmA619e4iISF/oYi1+eRERSSxhhb+ZfcLMSoGJwLNm9gKAc24dsABYDzwPXO+d6QPwFeAhoATYShQP9kLk9/x11aSIJINwz/Z5AniilffmA/NbaC8GRoez3fZIiXBat3Bmk0hc6Nc9us/bleSie/u0k6Jf4tGyedPIyki821qIfxT+7aQdf4lHfbXXL+2kK4ja6OoJoZOXNOwjIskgEHv+c84dQs/M9LDWMf/yM7j1slERqkhExF+BCP/vXnx62OtISTE6p2hMVUSSg4Z9REQCSOEvIhJACn8RkQBS+IuIBJDCX0QkgBT+IiIBpPAXEQkghb+ISACZcx16QFbMmdk+4J0OLp4DlEewHD8lS1+SpR+gvsQr9SVkkHMut3ljwoR/OMys2DlX5HcdkZAsfUmWfoD6Eq/Ul5PTsI+ISAAp/EVEAigo4f+A3wVEULL0JVn6AepLvFJfTiIQY/4iItJUUPb8RUSkEYW/iEgAJXX4m9kMM9tkZiVmNtfvelpiZr81szIzW9uorZeZLTSzLd7Pno3em+f1Z5OZTW/U/hEze8t771cW4+dNmtkAM1tsZhvMbJ2ZfS2B+9LZzFaY2RqvLz9M1L40qiPVzN4ws78ncl/MbLtXw2ozK07wvvQws7+a2Ubv/83EmPbFOZeUf4BUYCswBEgH1gAj/a6rhTrPBT4MrG3U9jNgrvd6LvBT7/VIrx8ZwGCvf6neeyuAiYAB/wAuinE/8oEPe6+7AZu9ehOxLwZ09V6nAcuBsxOxL4369A3gz8DfE/XfmFfDdiCnWVui9uVh4D+91+lAj1j2Jeb/CGP4i50IvNBoeh4wz++6Wqm1kKbhvwnI917nA5ta6gPwgtfPfGBjo/argV/73KengAsTvS9AJrAKOCtR+wL0BxYB5/NB+CdqX7ZzYvgnXF+AbGAb3kk3fvQlmYd9CoCdjaZLvbZE0Mc5twfA+5nntbfWpwLvdfN2X5hZITCe0B5zQvbFGyZZDZQBC51zCdsX4C7g20B9o7ZE7YsDXjSzlWY2x2tLxL4MAfYBv/OG4x4ysyxi2JdkDv+Wxr0S/bzW1voUN301s67A34CvO+cOn2zWFtripi/OuTrn3DhCe80TzGz0SWaP276Y2aVAmXNuZVsXaaEtLvrimeSc+zBwEXC9mZ17knnjuS+dCA333u+cGw8cJTTM05qI9yWZw78UGNBouj+w26da2muvmeUDeD/LvPbW+lTqvW7eHlNmlkYo+P/knHvca07IvhznnDsILAFmkJh9mQR83My2A48C55vZH0nMvuCc2+39LAOeACaQmH0pBUq9b5QAfyX0YRCzviRz+L8ODDezwWaWDswCnva5prZ6GrjWe30tofHz4+2zzCzDzAYDw4EV3tfDI2Z2tnekf3ajZWLC2+5vgA3OuV80eisR+5JrZj28112AC4CNJGBfnHPznHP9nXOFhP4PvOyc+2wi9sXMssys2/HXwMeAtSRgX5xz7wI7zew0r2kasJ5Y9iXWB2xifFDlYkJnnWwFbvG7nlZqfATYA9QQ+hS/DuhN6ADdFu9nr0bz3+L1ZxONjuoDRYT+I2wF7qHZgaQY9GMyoa+bbwKrvT8XJ2hfxgBveH1ZC/y3155wfWnWr/P44IBvwvWF0Dj5Gu/PuuP/pxOxL14N44Bi79/Zk0DPWPZFt3cQEQmgZB72ERGRVij8RUQCSOEvIhJACn8RkQBS+IuIBJDCX0QkgBT+IiIB9H8Ts6DUYg9tCwAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "logging.info('==== train & evaluate ====')\n",
    "episode_rewards = []\n",
    "for generation in itertools.count():\n",
    "    agent.train(reward_clipped_env)\n",
    "    episode_reward, elapsed_steps = play_episode(env, agent)\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('evaluate generation %d: reward = %.2f, steps = %d',\n",
    "            generation, episode_reward, elapsed_steps)\n",
    "    if np.mean(episode_rewards[-10:]) > env.spec.reward_threshold:\n",
    "        break\n",
    "plt.plot(episode_rewards)\n",
    "\n",
    "\n",
    "logging.info('==== test ====')\n",
    "episode_rewards = []\n",
    "for episode in range(100):\n",
    "    episode_reward, elapsed_steps = play_episode(env, agent)\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('test episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "logging.info('average episode reward = %.2f ± %.2f',\n",
    "        np.mean(episode_rewards), np.std(episode_rewards))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
